1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 1387 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1388 if (!rank) { 1389 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1390 } else { 1391 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1392 } 1393 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1394 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1395 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1396 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1397 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)aij->A->data; 1401 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1408 } 1409 aj = Aloc->j; 1410 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1411 1412 /* copy over the B part */ 1413 Aloc = (Mat_SeqAIJ*)aij->B->data; 1414 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1415 row = mat->rmap->rstart; 1416 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1417 ct = cols; 1418 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; 1422 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 /* 1428 Everyone has to call to draw the matrix since the graphics waits are 1429 synchronized across all processors that share the PetscDraw object 1430 */ 1431 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1432 if (!rank) { 1433 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1436 ierr = MatDestroy(&A);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ" 1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1444 { 1445 PetscErrorCode ierr; 1446 PetscBool iascii,isdraw,issocket,isbinary; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1453 if (iascii || isdraw || isbinary || issocket) { 1454 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSOR_MPIAIJ" 1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscErrorCode ierr; 1465 Vec bb1 = 0; 1466 PetscBool hasop; 1467 1468 PetscFunctionBegin; 1469 if (flag == SOR_APPLY_UPPER) { 1470 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1475 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1476 } 1477 1478 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_EISENSTAT) { 1528 Vec xx1; 1529 1530 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1532 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 if (!mat->diag) { 1536 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1537 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1538 } 1539 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1540 if (hasop) { 1541 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1542 } else { 1543 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1544 } 1545 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1546 1547 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1548 1549 /* local sweep */ 1550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1551 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1552 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1553 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1554 1555 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatPermute_MPIAIJ" 1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1562 { 1563 Mat aA,aB,Aperm; 1564 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1565 PetscScalar *aa,*ba; 1566 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1567 PetscSF rowsf,sf; 1568 IS parcolp = NULL; 1569 PetscBool done; 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1574 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1575 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1576 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1577 1578 /* Invert row permutation to find out where my rows should go */ 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1582 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1583 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1585 1586 /* Invert column permutation to find out where my columns should go */ 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1591 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1594 1595 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1597 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1598 1599 /* Find out where my gcols should go */ 1600 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1601 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1602 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1603 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1604 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1608 1609 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1610 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1611 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt row = rdest[i],rowner; 1614 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1615 for (j=ai[i]; j<ai[i+1]; j++) { 1616 PetscInt cowner,col = cdest[aj[j]]; 1617 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1618 if (rowner == cowner) dnnz[i]++; 1619 else onnz[i]++; 1620 } 1621 for (j=bi[i]; j<bi[i+1]; j++) { 1622 PetscInt cowner,col = gcdest[bj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 } 1628 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1629 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1630 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1631 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1633 1634 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1635 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) { 1638 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1639 PetscInt j0,rowlen; 1640 rowlen = ai[i+1] - ai[i]; 1641 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1642 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1643 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1644 } 1645 rowlen = bi[i+1] - bi[i]; 1646 for (j0=j=0; j<rowlen; j0=j) { 1647 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1648 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } 1651 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1654 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1655 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1657 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1658 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1659 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1660 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1661 *B = Aperm; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1668 { 1669 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1670 Mat A = mat->A,B = mat->B; 1671 PetscErrorCode ierr; 1672 PetscReal isend[5],irecv[5]; 1673 1674 PetscFunctionBegin; 1675 info->block_size = 1.0; 1676 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1677 1678 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1679 isend[3] = info->memory; isend[4] = info->mallocs; 1680 1681 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1682 1683 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1684 isend[3] += info->memory; isend[4] += info->mallocs; 1685 if (flag == MAT_LOCAL) { 1686 info->nz_used = isend[0]; 1687 info->nz_allocated = isend[1]; 1688 info->nz_unneeded = isend[2]; 1689 info->memory = isend[3]; 1690 info->mallocs = isend[4]; 1691 } else if (flag == MAT_GLOBAL_MAX) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } else if (flag == MAT_GLOBAL_SUM) { 1700 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1701 1702 info->nz_used = irecv[0]; 1703 info->nz_allocated = irecv[1]; 1704 info->nz_unneeded = irecv[2]; 1705 info->memory = irecv[3]; 1706 info->mallocs = irecv[4]; 1707 } 1708 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1709 info->fill_ratio_needed = 0; 1710 info->factor_mallocs = 0; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSetOption_MPIAIJ" 1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1717 { 1718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 switch (op) { 1723 case MAT_NEW_NONZERO_LOCATIONS: 1724 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1725 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1726 case MAT_KEEP_NONZERO_PATTERN: 1727 case MAT_NEW_NONZERO_LOCATION_ERR: 1728 case MAT_USE_INODES: 1729 case MAT_IGNORE_ZERO_ENTRIES: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_ROW_ORIENTED: 1735 a->roworiented = flg; 1736 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_NEW_DIAGONALS: 1741 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1742 break; 1743 case MAT_IGNORE_OFF_PROC_ENTRIES: 1744 a->donotstash = flg; 1745 break; 1746 case MAT_SPD: 1747 A->spd_set = PETSC_TRUE; 1748 A->spd = flg; 1749 if (flg) { 1750 A->symmetric = PETSC_TRUE; 1751 A->structurally_symmetric = PETSC_TRUE; 1752 A->symmetric_set = PETSC_TRUE; 1753 A->structurally_symmetric_set = PETSC_TRUE; 1754 } 1755 break; 1756 case MAT_SYMMETRIC: 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_STRUCTURALLY_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_HERMITIAN: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_SYMMETRY_ETERNAL: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 default: 1769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatGetRow_MPIAIJ" 1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1777 { 1778 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1779 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1780 PetscErrorCode ierr; 1781 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1782 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1783 PetscInt *cmap,*idx_p; 1784 1785 PetscFunctionBegin; 1786 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1787 mat->getrowactive = PETSC_TRUE; 1788 1789 if (!mat->rowvalues && (idx || v)) { 1790 /* 1791 allocate enough space to hold information from the longest row. 1792 */ 1793 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1794 PetscInt max = 1,tmp; 1795 for (i=0; i<matin->rmap->n; i++) { 1796 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1797 if (max < tmp) max = tmp; 1798 } 1799 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1800 } 1801 1802 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1803 lrow = row - rstart; 1804 1805 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1806 if (!v) {pvA = 0; pvB = 0;} 1807 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1808 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1809 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1810 nztot = nzA + nzB; 1811 1812 cmap = mat->garray; 1813 if (v || idx) { 1814 if (nztot) { 1815 /* Sort by increasing column numbers, assuming A and B already sorted */ 1816 PetscInt imark = -1; 1817 if (v) { 1818 *v = v_p = mat->rowvalues; 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1821 else break; 1822 } 1823 imark = i; 1824 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1825 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1826 } 1827 if (idx) { 1828 *idx = idx_p = mat->rowindices; 1829 if (imark > -1) { 1830 for (i=0; i<imark; i++) { 1831 idx_p[i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 for (i=0; i<nzB; i++) { 1835 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1836 else break; 1837 } 1838 imark = i; 1839 } 1840 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1841 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1842 } 1843 } else { 1844 if (idx) *idx = 0; 1845 if (v) *v = 0; 1846 } 1847 } 1848 *nz = nztot; 1849 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1850 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1859 1860 PetscFunctionBegin; 1861 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1862 aij->getrowactive = PETSC_FALSE; 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatNorm_MPIAIJ" 1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1869 { 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1872 PetscErrorCode ierr; 1873 PetscInt i,j,cstart = mat->cmap->rstart; 1874 PetscReal sum = 0.0; 1875 MatScalar *v; 1876 1877 PetscFunctionBegin; 1878 if (aij->size == 1) { 1879 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1880 } else { 1881 if (type == NORM_FROBENIUS) { 1882 v = amat->a; 1883 for (i=0; i<amat->nz; i++) { 1884 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1885 } 1886 v = bmat->a; 1887 for (i=0; i<bmat->nz; i++) { 1888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1889 } 1890 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 *norm = PetscSqrtReal(*norm); 1892 } else if (type == NORM_1) { /* max column norm */ 1893 PetscReal *tmp,*tmp2; 1894 PetscInt *jj,*garray = aij->garray; 1895 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1896 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1897 *norm = 0.0; 1898 v = amat->a; jj = amat->j; 1899 for (j=0; j<amat->nz; j++) { 1900 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a; jj = bmat->j; 1903 for (j=0; j<bmat->nz; j++) { 1904 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1905 } 1906 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 for (j=0; j<mat->cmap->N; j++) { 1908 if (tmp2[j] > *norm) *norm = tmp2[j]; 1909 } 1910 ierr = PetscFree(tmp);CHKERRQ(ierr); 1911 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1912 } else if (type == NORM_INFINITY) { /* max row norm */ 1913 PetscReal ntemp = 0.0; 1914 for (j=0; j<aij->A->rmap->n; j++) { 1915 v = amat->a + amat->i[j]; 1916 sum = 0.0; 1917 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1918 sum += PetscAbsScalar(*v); v++; 1919 } 1920 v = bmat->a + bmat->i[j]; 1921 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1922 sum += PetscAbsScalar(*v); v++; 1923 } 1924 if (sum > ntemp) ntemp = sum; 1925 } 1926 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatTranspose_MPIAIJ" 1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1935 { 1936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1937 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1938 PetscErrorCode ierr; 1939 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1940 PetscInt cstart = A->cmap->rstart,ncol; 1941 Mat B; 1942 MatScalar *array; 1943 1944 PetscFunctionBegin; 1945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1946 1947 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1948 ai = Aloc->i; aj = Aloc->j; 1949 bi = Bloc->i; bj = Bloc->j; 1950 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1951 PetscInt *d_nnz,*g_nnz,*o_nnz; 1952 PetscSFNode *oloc; 1953 PETSC_UNUSED PetscSF sf; 1954 1955 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1956 /* compute d_nnz for preallocation */ 1957 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) { 1959 d_nnz[aj[i]]++; 1960 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1961 } 1962 /* compute local off-diagonal contributions */ 1963 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1964 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1965 /* map those to global */ 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1968 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1969 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1970 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1971 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1972 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1973 1974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1975 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1976 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1977 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1978 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1979 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1980 } else { 1981 B = *matout; 1982 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1983 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1984 } 1985 1986 /* copy over the A part */ 1987 array = Aloc->a; 1988 row = A->rmap->rstart; 1989 for (i=0; i<ma; i++) { 1990 ncol = ai[i+1]-ai[i]; 1991 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; aj += ncol; 1994 } 1995 aj = Aloc->j; 1996 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1997 1998 /* copy over the B part */ 1999 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2000 array = Bloc->a; 2001 row = A->rmap->rstart; 2002 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2003 cols_tmp = cols; 2004 for (i=0; i<mb; i++) { 2005 ncol = bi[i+1]-bi[i]; 2006 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2007 row++; 2008 array += ncol; cols_tmp += ncol; 2009 } 2010 ierr = PetscFree(cols);CHKERRQ(ierr); 2011 2012 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2015 *matout = B; 2016 } else { 2017 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2025 { 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat a = aij->A,b = aij->B; 2028 PetscErrorCode ierr; 2029 PetscInt s1,s2,s3; 2030 2031 PetscFunctionBegin; 2032 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2033 if (rr) { 2034 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2035 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2036 /* Overlap communication with computation. */ 2037 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 } 2039 if (ll) { 2040 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2041 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2042 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2043 } 2044 /* scale the diagonal block */ 2045 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2046 2047 if (rr) { 2048 /* Do a scatter end and then right scale the off-diagonal block */ 2049 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2050 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2058 { 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatEqual_MPIAIJ" 2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2070 { 2071 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2072 Mat a,b,c,d; 2073 PetscBool flg; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 a = matA->A; b = matA->B; 2078 c = matB->A; d = matB->B; 2079 2080 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2081 if (flg) { 2082 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2083 } 2084 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatCopy_MPIAIJ" 2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2095 2096 PetscFunctionBegin; 2097 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2098 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2099 /* because of the column compression in the off-processor part of the matrix a->B, 2100 the number of columns in a->B and b->B may be different, hence we cannot call 2101 the MatCopy() directly on the two parts. If need be, we can provide a more 2102 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2103 then copying the submatrices */ 2104 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2107 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUp_MPIAIJ" 2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* 2124 Computes the number of nonzeros per row needed for preallocation when X and Y 2125 have different nonzero structure. 2126 */ 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2130 { 2131 PetscInt i,j,k,nzx,nzy; 2132 2133 PetscFunctionBegin; 2134 /* Set the number of nonzeros in the new matrix */ 2135 for (i=0; i<m; i++) { 2136 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2137 nzx = xi[i+1] - xi[i]; 2138 nzy = yi[i+1] - yi[i]; 2139 nnz[i] = 0; 2140 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2141 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2142 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2143 nnz[i]++; 2144 } 2145 for (; k<nzy; k++) nnz[i]++; 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2154 { 2155 PetscErrorCode ierr; 2156 PetscInt m = Y->rmap->N; 2157 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2158 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2159 2160 PetscFunctionBegin; 2161 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatAXPY_MPIAIJ" 2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2168 { 2169 PetscErrorCode ierr; 2170 PetscInt i; 2171 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2172 PetscBLASInt bnz,one=1; 2173 Mat_SeqAIJ *x,*y; 2174 2175 PetscFunctionBegin; 2176 if (str == SAME_NONZERO_PATTERN) { 2177 PetscScalar alpha = a; 2178 x = (Mat_SeqAIJ*)xx->A->data; 2179 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2180 y = (Mat_SeqAIJ*)yy->A->data; 2181 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2182 x = (Mat_SeqAIJ*)xx->B->data; 2183 y = (Mat_SeqAIJ*)yy->B->data; 2184 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2185 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2186 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2187 } else if (str == SUBSET_NONZERO_PATTERN) { 2188 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2189 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 if (y->xtoy && y->XtoY != xx->B) { 2193 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2194 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2195 } 2196 if (!y->xtoy) { /* get xtoy */ 2197 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2198 y->XtoY = xx->B; 2199 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2200 } 2201 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2202 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2203 } else { 2204 Mat B; 2205 PetscInt *nnz_d,*nnz_o; 2206 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2207 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2208 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2209 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2210 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2211 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2212 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2213 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2214 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2215 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2216 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2217 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2218 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2219 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatConjugate_MPIAIJ" 2228 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2229 { 2230 #if defined(PETSC_USE_COMPLEX) 2231 PetscErrorCode ierr; 2232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2233 2234 PetscFunctionBegin; 2235 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2236 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2237 #else 2238 PetscFunctionBegin; 2239 #endif 2240 PetscFunctionReturn(0); 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatRealPart_MPIAIJ" 2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 2250 PetscFunctionBegin; 2251 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2252 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2265 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #if defined(PETSC_HAVE_PBGL) 2270 2271 #include <boost/parallel/mpi/bsp_process_group.hpp> 2272 #include <boost/graph/distributed/ilu_default_graph.hpp> 2273 #include <boost/graph/distributed/ilu_0_block.hpp> 2274 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2275 #include <boost/graph/distributed/petsc/interface.hpp> 2276 #include <boost/multi_array.hpp> 2277 #include <boost/parallel/distributed_property_map->hpp> 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2281 /* 2282 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2283 */ 2284 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2285 { 2286 namespace petsc = boost::distributed::petsc; 2287 2288 namespace graph_dist = boost::graph::distributed; 2289 using boost::graph::distributed::ilu_default::process_group_type; 2290 using boost::graph::ilu_permuted; 2291 2292 PetscBool row_identity, col_identity; 2293 PetscContainer c; 2294 PetscInt m, n, M, N; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2299 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2300 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2301 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2302 2303 process_group_type pg; 2304 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2305 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2306 lgraph_type& level_graph = *lgraph_p; 2307 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2308 2309 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2310 ilu_permuted(level_graph); 2311 2312 /* put together the new matrix */ 2313 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2314 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2315 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2316 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2317 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2318 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2319 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2320 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2321 2322 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2323 ierr = PetscContainerSetPointer(c, lgraph_p); 2324 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2325 ierr = PetscContainerDestroy(&c); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2331 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2332 { 2333 PetscFunctionBegin; 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatSolve_MPIAIJ" 2339 /* 2340 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2341 */ 2342 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2343 { 2344 namespace graph_dist = boost::graph::distributed; 2345 2346 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2347 lgraph_type *lgraph_p; 2348 PetscContainer c; 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2353 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2354 ierr = VecCopy(b, x);CHKERRQ(ierr); 2355 2356 PetscScalar *array_x; 2357 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2358 PetscInt sx; 2359 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2360 2361 PetscScalar *array_b; 2362 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2363 PetscInt sb; 2364 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2365 2366 lgraph_type& level_graph = *lgraph_p; 2367 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2368 2369 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2370 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2371 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2372 2373 typedef boost::iterator_property_map<array_ref_type::iterator, 2374 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2375 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2376 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2377 2378 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2379 PetscFunctionReturn(0); 2380 } 2381 #endif 2382 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2387 { 2388 PetscMPIInt rank,size; 2389 MPI_Comm comm; 2390 PetscErrorCode ierr; 2391 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2392 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2393 PetscInt *rowrange = mat->rmap->range; 2394 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2395 Mat A = aij->A,B=aij->B,C=*matredundant; 2396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2397 PetscScalar *sbuf_a; 2398 PetscInt nzlocal=a->nz+b->nz; 2399 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2400 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2401 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2402 MatScalar *aworkA,*aworkB; 2403 PetscScalar *vals; 2404 PetscMPIInt tag1,tag2,tag3,imdex; 2405 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2406 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2407 MPI_Status recv_status,*send_status; 2408 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2409 PetscInt **rbuf_j=NULL; 2410 PetscScalar **rbuf_a=NULL; 2411 Mat_Redundant *redund =NULL; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2417 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2419 2420 if (reuse == MAT_REUSE_MATRIX) { 2421 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2422 if (subsize == 1) { 2423 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2424 redund = c->redundant; 2425 } else { 2426 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2427 redund = c->redundant; 2428 } 2429 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2430 2431 nsends = redund->nsends; 2432 nrecvs = redund->nrecvs; 2433 send_rank = redund->send_rank; 2434 recv_rank = redund->recv_rank; 2435 sbuf_nz = redund->sbuf_nz; 2436 rbuf_nz = redund->rbuf_nz; 2437 sbuf_j = redund->sbuf_j; 2438 sbuf_a = redund->sbuf_a; 2439 rbuf_j = redund->rbuf_j; 2440 rbuf_a = redund->rbuf_a; 2441 } 2442 2443 if (reuse == MAT_INITIAL_MATRIX) { 2444 PetscInt nleftover,np_subcomm; 2445 2446 /* get the destination processors' id send_rank, nsends and nrecvs */ 2447 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2448 2449 np_subcomm = size/nsubcomm; 2450 nleftover = size - nsubcomm*np_subcomm; 2451 2452 /* block of codes below is specific for INTERLACED */ 2453 /* ------------------------------------------------*/ 2454 nsends = 0; nrecvs = 0; 2455 for (i=0; i<size; i++) { 2456 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2457 send_rank[nsends++] = i; 2458 recv_rank[nrecvs++] = i; 2459 } 2460 } 2461 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2462 i = size-nleftover-1; 2463 j = 0; 2464 while (j < nsubcomm - nleftover) { 2465 send_rank[nsends++] = i; 2466 i--; j++; 2467 } 2468 } 2469 2470 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2471 for (i=0; i<nleftover; i++) { 2472 recv_rank[nrecvs++] = size-nleftover+i; 2473 } 2474 } 2475 /*----------------------------------------------*/ 2476 2477 /* allocate sbuf_j, sbuf_a */ 2478 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2479 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2480 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2481 /* 2482 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2483 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2484 */ 2485 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2486 2487 /* copy mat's local entries into the buffers */ 2488 if (reuse == MAT_INITIAL_MATRIX) { 2489 rownz_max = 0; 2490 rptr = sbuf_j; 2491 cols = sbuf_j + rend-rstart + 1; 2492 vals = sbuf_a; 2493 rptr[0] = 0; 2494 for (i=0; i<rend-rstart; i++) { 2495 row = i + rstart; 2496 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2497 ncols = nzA + nzB; 2498 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2499 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2500 /* load the column indices for this row into cols */ 2501 lwrite = 0; 2502 for (l=0; l<nzB; l++) { 2503 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2504 vals[lwrite] = aworkB[l]; 2505 cols[lwrite++] = ctmp; 2506 } 2507 } 2508 for (l=0; l<nzA; l++) { 2509 vals[lwrite] = aworkA[l]; 2510 cols[lwrite++] = cstart + cworkA[l]; 2511 } 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 vals += ncols; 2519 cols += ncols; 2520 rptr[i+1] = rptr[i] + ncols; 2521 if (rownz_max < ncols) rownz_max = ncols; 2522 } 2523 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2524 } else { /* only copy matrix values into sbuf_a */ 2525 rptr = sbuf_j; 2526 vals = sbuf_a; 2527 rptr[0] = 0; 2528 for (i=0; i<rend-rstart; i++) { 2529 row = i + rstart; 2530 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2531 ncols = nzA + nzB; 2532 cworkB = b->j + b->i[i]; 2533 aworkA = a->a + a->i[i]; 2534 aworkB = b->a + b->i[i]; 2535 lwrite = 0; 2536 for (l=0; l<nzB; l++) { 2537 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2538 } 2539 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2540 for (l=0; l<nzB; l++) { 2541 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2542 } 2543 vals += ncols; 2544 rptr[i+1] = rptr[i] + ncols; 2545 } 2546 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2547 2548 /* send nzlocal to others, and recv other's nzlocal */ 2549 /*--------------------------------------------------*/ 2550 if (reuse == MAT_INITIAL_MATRIX) { 2551 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2552 2553 s_waits2 = s_waits3 + nsends; 2554 s_waits1 = s_waits2 + nsends; 2555 r_waits1 = s_waits1 + nsends; 2556 r_waits2 = r_waits1 + nrecvs; 2557 r_waits3 = r_waits2 + nrecvs; 2558 } else { 2559 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2560 2561 r_waits3 = s_waits3 + nsends; 2562 } 2563 2564 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2565 if (reuse == MAT_INITIAL_MATRIX) { 2566 /* get new tags to keep the communication clean */ 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2568 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2569 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2570 2571 /* post receives of other's nzlocal */ 2572 for (i=0; i<nrecvs; i++) { 2573 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* send nzlocal to others */ 2576 for (i=0; i<nsends; i++) { 2577 sbuf_nz[i] = nzlocal; 2578 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2579 } 2580 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2581 count = nrecvs; 2582 while (count) { 2583 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2584 2585 recv_rank[imdex] = recv_status.MPI_SOURCE; 2586 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2587 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2588 2589 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2590 2591 rbuf_nz[imdex] += i + 2; 2592 2593 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2594 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2595 count--; 2596 } 2597 /* wait on sends of nzlocal */ 2598 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2599 /* send mat->i,j to others, and recv from other's */ 2600 /*------------------------------------------------*/ 2601 for (i=0; i<nsends; i++) { 2602 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2603 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2604 } 2605 /* wait on receives of mat->i,j */ 2606 /*------------------------------*/ 2607 count = nrecvs; 2608 while (count) { 2609 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2610 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2611 count--; 2612 } 2613 /* wait on sends of mat->i,j */ 2614 /*---------------------------*/ 2615 if (nsends) { 2616 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2617 } 2618 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2619 2620 /* post receives, send and receive mat->a */ 2621 /*----------------------------------------*/ 2622 for (imdex=0; imdex<nrecvs; imdex++) { 2623 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2624 } 2625 for (i=0; i<nsends; i++) { 2626 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2627 } 2628 count = nrecvs; 2629 while (count) { 2630 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2631 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2632 count--; 2633 } 2634 if (nsends) { 2635 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2636 } 2637 2638 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2639 2640 /* create redundant matrix */ 2641 /*-------------------------*/ 2642 if (reuse == MAT_INITIAL_MATRIX) { 2643 const PetscInt *range; 2644 PetscInt rstart_sub,rend_sub,mloc_sub; 2645 2646 /* compute rownz_max for preallocation */ 2647 for (imdex=0; imdex<nrecvs; imdex++) { 2648 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2649 rptr = rbuf_j[imdex]; 2650 for (i=0; i<j; i++) { 2651 ncols = rptr[i+1] - rptr[i]; 2652 if (rownz_max < ncols) rownz_max = ncols; 2653 } 2654 } 2655 2656 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2657 2658 /* get local size of redundant matrix 2659 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2660 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2661 rstart_sub = range[nsubcomm*subrank]; 2662 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2663 rend_sub = range[nsubcomm*(subrank+1)]; 2664 } else { 2665 rend_sub = mat->rmap->N; 2666 } 2667 mloc_sub = rend_sub - rstart_sub; 2668 2669 if (M == N) { 2670 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2671 } else { /* non-square matrix */ 2672 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2673 } 2674 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2675 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2676 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2677 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2678 } else { 2679 C = *matredundant; 2680 } 2681 2682 /* insert local matrix entries */ 2683 rptr = sbuf_j; 2684 cols = sbuf_j + rend-rstart + 1; 2685 vals = sbuf_a; 2686 for (i=0; i<rend-rstart; i++) { 2687 row = i + rstart; 2688 ncols = rptr[i+1] - rptr[i]; 2689 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 vals += ncols; 2691 cols += ncols; 2692 } 2693 /* insert received matrix entries */ 2694 for (imdex=0; imdex<nrecvs; imdex++) { 2695 rstart = rowrange[recv_rank[imdex]]; 2696 rend = rowrange[recv_rank[imdex]+1]; 2697 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2698 rptr = rbuf_j[imdex]; 2699 cols = rbuf_j[imdex] + rend-rstart + 1; 2700 vals = rbuf_a[imdex]; 2701 for (i=0; i<rend-rstart; i++) { 2702 row = i + rstart; 2703 ncols = rptr[i+1] - rptr[i]; 2704 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 vals += ncols; 2706 cols += ncols; 2707 } 2708 } 2709 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 2712 if (reuse == MAT_INITIAL_MATRIX) { 2713 *matredundant = C; 2714 2715 /* create a supporting struct and attach it to C for reuse */ 2716 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2717 if (subsize == 1) { 2718 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2719 c->redundant = redund; 2720 } else { 2721 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2722 c->redundant = redund; 2723 } 2724 2725 redund->nzlocal = nzlocal; 2726 redund->nsends = nsends; 2727 redund->nrecvs = nrecvs; 2728 redund->send_rank = send_rank; 2729 redund->recv_rank = recv_rank; 2730 redund->sbuf_nz = sbuf_nz; 2731 redund->rbuf_nz = rbuf_nz; 2732 redund->sbuf_j = sbuf_j; 2733 redund->sbuf_a = sbuf_a; 2734 redund->rbuf_j = rbuf_j; 2735 redund->rbuf_a = rbuf_a; 2736 redund->psubcomm = NULL; 2737 } 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2743 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2744 { 2745 PetscErrorCode ierr; 2746 MPI_Comm comm; 2747 PetscMPIInt size,subsize; 2748 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2749 Mat_Redundant *redund=NULL; 2750 PetscSubcomm psubcomm=NULL; 2751 MPI_Comm subcomm_in=subcomm; 2752 Mat *matseq; 2753 IS isrow,iscol; 2754 2755 PetscFunctionBegin; 2756 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2757 if (reuse == MAT_INITIAL_MATRIX) { 2758 /* create psubcomm, then get subcomm */ 2759 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2760 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2761 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2762 2763 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2765 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2766 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2767 subcomm = psubcomm->comm; 2768 } else { /* retrieve psubcomm and subcomm */ 2769 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2770 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2771 if (subsize == 1) { 2772 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2773 redund = c->redundant; 2774 } else { 2775 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2776 redund = c->redundant; 2777 } 2778 psubcomm = redund->psubcomm; 2779 } 2780 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2781 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2782 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2783 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2784 if (subsize == 1) { 2785 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2786 c->redundant->psubcomm = psubcomm; 2787 } else { 2788 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2789 c->redundant->psubcomm = psubcomm ; 2790 } 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 } 2795 2796 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2797 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2798 if (reuse == MAT_INITIAL_MATRIX) { 2799 /* create a local sequential matrix matseq[0] */ 2800 mloc_sub = PETSC_DECIDE; 2801 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2802 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2803 rstart = rend - mloc_sub; 2804 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2805 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2806 } else { /* reuse == MAT_REUSE_MATRIX */ 2807 if (subsize == 1) { 2808 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2809 redund = c->redundant; 2810 } else { 2811 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2812 redund = c->redundant; 2813 } 2814 2815 isrow = redund->isrow; 2816 iscol = redund->iscol; 2817 matseq = redund->matseq; 2818 } 2819 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2820 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2821 2822 if (reuse == MAT_INITIAL_MATRIX) { 2823 /* create a supporting struct and attach it to C for reuse */ 2824 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2825 if (subsize == 1) { 2826 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2827 c->redundant = redund; 2828 } else { 2829 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2830 c->redundant = redund; 2831 } 2832 redund->isrow = isrow; 2833 redund->iscol = iscol; 2834 redund->matseq = matseq; 2835 redund->psubcomm = psubcomm; 2836 } 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2843 { 2844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2845 PetscErrorCode ierr; 2846 PetscInt i,*idxb = 0; 2847 PetscScalar *va,*vb; 2848 Vec vtmp; 2849 2850 PetscFunctionBegin; 2851 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2852 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2853 if (idx) { 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2856 } 2857 } 2858 2859 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2860 if (idx) { 2861 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2862 } 2863 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2864 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2865 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2868 va[i] = vb[i]; 2869 if (idx) idx[i] = a->garray[idxb[i]]; 2870 } 2871 } 2872 2873 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2874 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2875 ierr = PetscFree(idxb);CHKERRQ(ierr); 2876 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2883 { 2884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2885 PetscErrorCode ierr; 2886 PetscInt i,*idxb = 0; 2887 PetscScalar *va,*vb; 2888 Vec vtmp; 2889 2890 PetscFunctionBegin; 2891 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2892 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2893 if (idx) { 2894 for (i=0; i<A->cmap->n; i++) { 2895 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2896 } 2897 } 2898 2899 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2900 if (idx) { 2901 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2902 } 2903 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2904 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2905 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2908 va[i] = vb[i]; 2909 if (idx) idx[i] = a->garray[idxb[i]]; 2910 } 2911 } 2912 2913 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2914 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2915 ierr = PetscFree(idxb);CHKERRQ(ierr); 2916 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2923 { 2924 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2925 PetscInt n = A->rmap->n; 2926 PetscInt cstart = A->cmap->rstart; 2927 PetscInt *cmap = mat->garray; 2928 PetscInt *diagIdx, *offdiagIdx; 2929 Vec diagV, offdiagV; 2930 PetscScalar *a, *diagA, *offdiagA; 2931 PetscInt r; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2938 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 for (r = 0; r < n; ++r) { 2944 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2945 a[r] = diagA[r]; 2946 idx[r] = cstart + diagIdx[r]; 2947 } else { 2948 a[r] = offdiagA[r]; 2949 idx[r] = cmap[offdiagIdx[r]]; 2950 } 2951 } 2952 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2956 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2957 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2964 { 2965 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2966 PetscInt n = A->rmap->n; 2967 PetscInt cstart = A->cmap->rstart; 2968 PetscInt *cmap = mat->garray; 2969 PetscInt *diagIdx, *offdiagIdx; 2970 Vec diagV, offdiagV; 2971 PetscScalar *a, *diagA, *offdiagA; 2972 PetscInt r; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2979 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 for (r = 0; r < n; ++r) { 2985 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2986 a[r] = diagA[r]; 2987 idx[r] = cstart + diagIdx[r]; 2988 } else { 2989 a[r] = offdiagA[r]; 2990 idx[r] = cmap[offdiagIdx[r]]; 2991 } 2992 } 2993 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2997 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2998 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3005 { 3006 PetscErrorCode ierr; 3007 Mat *dummy; 3008 3009 PetscFunctionBegin; 3010 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3011 *newmat = *dummy; 3012 ierr = PetscFree(dummy);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3018 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3019 { 3020 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3030 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3031 { 3032 PetscErrorCode ierr; 3033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3034 3035 PetscFunctionBegin; 3036 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3037 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3038 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /* -------------------------------------------------------------------*/ 3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3045 MatGetRow_MPIAIJ, 3046 MatRestoreRow_MPIAIJ, 3047 MatMult_MPIAIJ, 3048 /* 4*/ MatMultAdd_MPIAIJ, 3049 MatMultTranspose_MPIAIJ, 3050 MatMultTransposeAdd_MPIAIJ, 3051 #if defined(PETSC_HAVE_PBGL) 3052 MatSolve_MPIAIJ, 3053 #else 3054 0, 3055 #endif 3056 0, 3057 0, 3058 /*10*/ 0, 3059 0, 3060 0, 3061 MatSOR_MPIAIJ, 3062 MatTranspose_MPIAIJ, 3063 /*15*/ MatGetInfo_MPIAIJ, 3064 MatEqual_MPIAIJ, 3065 MatGetDiagonal_MPIAIJ, 3066 MatDiagonalScale_MPIAIJ, 3067 MatNorm_MPIAIJ, 3068 /*20*/ MatAssemblyBegin_MPIAIJ, 3069 MatAssemblyEnd_MPIAIJ, 3070 MatSetOption_MPIAIJ, 3071 MatZeroEntries_MPIAIJ, 3072 /*24*/ MatZeroRows_MPIAIJ, 3073 0, 3074 #if defined(PETSC_HAVE_PBGL) 3075 0, 3076 #else 3077 0, 3078 #endif 3079 0, 3080 0, 3081 /*29*/ MatSetUp_MPIAIJ, 3082 #if defined(PETSC_HAVE_PBGL) 3083 0, 3084 #else 3085 0, 3086 #endif 3087 0, 3088 0, 3089 0, 3090 /*34*/ MatDuplicate_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*39*/ MatAXPY_MPIAIJ, 3096 MatGetSubMatrices_MPIAIJ, 3097 MatIncreaseOverlap_MPIAIJ, 3098 MatGetValues_MPIAIJ, 3099 MatCopy_MPIAIJ, 3100 /*44*/ MatGetRowMax_MPIAIJ, 3101 MatScale_MPIAIJ, 3102 0, 3103 0, 3104 MatZeroRowsColumns_MPIAIJ, 3105 /*49*/ MatSetRandom_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*54*/ MatFDColoringCreate_MPIXAIJ, 3111 0, 3112 MatSetUnfactored_MPIAIJ, 3113 MatPermute_MPIAIJ, 3114 0, 3115 /*59*/ MatGetSubMatrix_MPIAIJ, 3116 MatDestroy_MPIAIJ, 3117 MatView_MPIAIJ, 3118 0, 3119 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3120 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3121 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3122 0, 3123 0, 3124 0, 3125 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3126 MatGetRowMinAbs_MPIAIJ, 3127 0, 3128 MatSetColoring_MPIAIJ, 3129 0, 3130 MatSetValuesAdifor_MPIAIJ, 3131 /*75*/ MatFDColoringApply_AIJ, 3132 0, 3133 0, 3134 0, 3135 MatFindZeroDiagonals_MPIAIJ, 3136 /*80*/ 0, 3137 0, 3138 0, 3139 /*83*/ MatLoad_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 0, 3145 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3146 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3147 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3148 MatPtAP_MPIAIJ_MPIAIJ, 3149 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3150 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3151 0, 3152 0, 3153 0, 3154 0, 3155 /*99*/ 0, 3156 0, 3157 0, 3158 MatConjugate_MPIAIJ, 3159 0, 3160 /*104*/MatSetValuesRow_MPIAIJ, 3161 MatRealPart_MPIAIJ, 3162 MatImaginaryPart_MPIAIJ, 3163 0, 3164 0, 3165 /*109*/0, 3166 MatGetRedundantMatrix_MPIAIJ, 3167 MatGetRowMin_MPIAIJ, 3168 0, 3169 0, 3170 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*119*/0, 3176 0, 3177 0, 3178 0, 3179 MatGetMultiProcBlock_MPIAIJ, 3180 /*124*/MatFindNonzeroRows_MPIAIJ, 3181 MatGetColumnNorms_MPIAIJ, 3182 MatInvertBlockDiagonal_MPIAIJ, 3183 0, 3184 MatGetSubMatricesParallel_MPIAIJ, 3185 /*129*/0, 3186 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3187 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3188 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3189 0, 3190 /*134*/0, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*139*/0, 3196 0, 3197 0, 3198 MatFDColoringSetUp_MPIXAIJ 3199 }; 3200 3201 /* ----------------------------------------------------------------------------------------*/ 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3205 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3206 { 3207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3208 PetscErrorCode ierr; 3209 3210 PetscFunctionBegin; 3211 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3212 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3213 PetscFunctionReturn(0); 3214 } 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3218 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3219 { 3220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3225 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3231 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3232 { 3233 Mat_MPIAIJ *b; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3239 b = (Mat_MPIAIJ*)B->data; 3240 3241 if (!B->preallocated) { 3242 /* Explicitly create 2 MATSEQAIJ matrices. */ 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3246 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3248 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3249 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3250 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3251 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3253 } 3254 3255 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3256 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3257 B->preallocated = PETSC_TRUE; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3264 { 3265 Mat mat; 3266 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 *newmat = 0; 3271 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3272 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3274 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3275 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3276 a = (Mat_MPIAIJ*)mat->data; 3277 3278 mat->factortype = matin->factortype; 3279 mat->assembled = PETSC_TRUE; 3280 mat->insertmode = NOT_SET_VALUES; 3281 mat->preallocated = PETSC_TRUE; 3282 3283 a->size = oldmat->size; 3284 a->rank = oldmat->rank; 3285 a->donotstash = oldmat->donotstash; 3286 a->roworiented = oldmat->roworiented; 3287 a->rowindices = 0; 3288 a->rowvalues = 0; 3289 a->getrowactive = PETSC_FALSE; 3290 3291 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3293 3294 if (oldmat->colmap) { 3295 #if defined(PETSC_USE_CTABLE) 3296 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3297 #else 3298 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 #endif 3302 } else a->colmap = 0; 3303 if (oldmat->garray) { 3304 PetscInt len; 3305 len = oldmat->B->cmap->n; 3306 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3307 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3308 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3309 } else a->garray = 0; 3310 3311 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3312 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3313 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3315 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3319 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3320 *newmat = mat; 3321 PetscFunctionReturn(0); 3322 } 3323 3324 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatLoad_MPIAIJ" 3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3329 { 3330 PetscScalar *vals,*svals; 3331 MPI_Comm comm; 3332 PetscErrorCode ierr; 3333 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3334 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3335 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3336 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3337 PetscInt cend,cstart,n,*rowners,sizesset=1; 3338 int fd; 3339 PetscInt bs = 1; 3340 3341 PetscFunctionBegin; 3342 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3344 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3345 if (!rank) { 3346 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3347 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3348 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3349 } 3350 3351 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3352 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3353 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3354 3355 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3356 3357 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3358 M = header[1]; N = header[2]; 3359 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3360 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3361 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3362 3363 /* If global sizes are set, check if they are consistent with that given in the file */ 3364 if (sizesset) { 3365 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3366 } 3367 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3368 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3369 3370 /* determine ownership of all (block) rows */ 3371 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3372 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3373 else m = newMat->rmap->n; /* Set by user */ 3374 3375 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3376 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3377 3378 /* First process needs enough room for process with most rows */ 3379 if (!rank) { 3380 mmax = rowners[1]; 3381 for (i=2; i<=size; i++) { 3382 mmax = PetscMax(mmax, rowners[i]); 3383 } 3384 } else mmax = -1; /* unused, but compilers complain */ 3385 3386 rowners[0] = 0; 3387 for (i=2; i<=size; i++) { 3388 rowners[i] += rowners[i-1]; 3389 } 3390 rstart = rowners[rank]; 3391 rend = rowners[rank+1]; 3392 3393 /* distribute row lengths to all processors */ 3394 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3395 if (!rank) { 3396 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3397 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3398 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3399 for (j=0; j<m; j++) { 3400 procsnz[0] += ourlens[j]; 3401 } 3402 for (i=1; i<size; i++) { 3403 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3404 /* calculate the number of nonzeros on each processor */ 3405 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3406 procsnz[i] += rowlengths[j]; 3407 } 3408 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3409 } 3410 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3411 } else { 3412 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3413 } 3414 3415 if (!rank) { 3416 /* determine max buffer needed and allocate it */ 3417 maxnz = 0; 3418 for (i=0; i<size; i++) { 3419 maxnz = PetscMax(maxnz,procsnz[i]); 3420 } 3421 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3422 3423 /* read in my part of the matrix column indices */ 3424 nz = procsnz[0]; 3425 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3426 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3427 3428 /* read in every one elses and ship off */ 3429 for (i=1; i<size; i++) { 3430 nz = procsnz[i]; 3431 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3432 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3433 } 3434 ierr = PetscFree(cols);CHKERRQ(ierr); 3435 } else { 3436 /* determine buffer space needed for message */ 3437 nz = 0; 3438 for (i=0; i<m; i++) { 3439 nz += ourlens[i]; 3440 } 3441 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3442 3443 /* receive message of column indices*/ 3444 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3445 } 3446 3447 /* determine column ownership if matrix is not square */ 3448 if (N != M) { 3449 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3450 else n = newMat->cmap->n; 3451 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3452 cstart = cend - n; 3453 } else { 3454 cstart = rstart; 3455 cend = rend; 3456 n = cend - cstart; 3457 } 3458 3459 /* loop over local rows, determining number of off diagonal entries */ 3460 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3461 jj = 0; 3462 for (i=0; i<m; i++) { 3463 for (j=0; j<ourlens[i]; j++) { 3464 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3465 jj++; 3466 } 3467 } 3468 3469 for (i=0; i<m; i++) { 3470 ourlens[i] -= offlens[i]; 3471 } 3472 if (!sizesset) { 3473 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3474 } 3475 3476 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3477 3478 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3479 3480 for (i=0; i<m; i++) { 3481 ourlens[i] += offlens[i]; 3482 } 3483 3484 if (!rank) { 3485 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3486 3487 /* read in my part of the matrix numerical values */ 3488 nz = procsnz[0]; 3489 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3490 3491 /* insert into matrix */ 3492 jj = rstart; 3493 smycols = mycols; 3494 svals = vals; 3495 for (i=0; i<m; i++) { 3496 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3497 smycols += ourlens[i]; 3498 svals += ourlens[i]; 3499 jj++; 3500 } 3501 3502 /* read in other processors and ship out */ 3503 for (i=1; i<size; i++) { 3504 nz = procsnz[i]; 3505 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3506 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3507 } 3508 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3509 } else { 3510 /* receive numeric values */ 3511 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3512 3513 /* receive message of values*/ 3514 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3515 3516 /* insert into matrix */ 3517 jj = rstart; 3518 smycols = mycols; 3519 svals = vals; 3520 for (i=0; i<m; i++) { 3521 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3522 smycols += ourlens[i]; 3523 svals += ourlens[i]; 3524 jj++; 3525 } 3526 } 3527 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3528 ierr = PetscFree(vals);CHKERRQ(ierr); 3529 ierr = PetscFree(mycols);CHKERRQ(ierr); 3530 ierr = PetscFree(rowners);CHKERRQ(ierr); 3531 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 PetscFunctionReturn(0); 3534 } 3535 3536 #undef __FUNCT__ 3537 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3538 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3539 { 3540 PetscErrorCode ierr; 3541 IS iscol_local; 3542 PetscInt csize; 3543 3544 PetscFunctionBegin; 3545 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3546 if (call == MAT_REUSE_MATRIX) { 3547 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3548 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3549 } else { 3550 PetscInt cbs; 3551 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3552 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3553 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3554 } 3555 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3556 if (call == MAT_INITIAL_MATRIX) { 3557 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3558 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3559 } 3560 PetscFunctionReturn(0); 3561 } 3562 3563 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3566 /* 3567 Not great since it makes two copies of the submatrix, first an SeqAIJ 3568 in local and then by concatenating the local matrices the end result. 3569 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3570 3571 Note: This requires a sequential iscol with all indices. 3572 */ 3573 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3574 { 3575 PetscErrorCode ierr; 3576 PetscMPIInt rank,size; 3577 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3578 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3579 PetscBool allcolumns, colflag; 3580 Mat M,Mreuse; 3581 MatScalar *vwork,*aa; 3582 MPI_Comm comm; 3583 Mat_SeqAIJ *aij; 3584 3585 PetscFunctionBegin; 3586 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3589 3590 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3591 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3592 if (colflag && ncol == mat->cmap->N) { 3593 allcolumns = PETSC_TRUE; 3594 } else { 3595 allcolumns = PETSC_FALSE; 3596 } 3597 if (call == MAT_REUSE_MATRIX) { 3598 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3599 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3600 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3601 } else { 3602 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3603 } 3604 3605 /* 3606 m - number of local rows 3607 n - number of columns (same on all processors) 3608 rstart - first row in new global matrix generated 3609 */ 3610 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3611 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3612 if (call == MAT_INITIAL_MATRIX) { 3613 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3614 ii = aij->i; 3615 jj = aij->j; 3616 3617 /* 3618 Determine the number of non-zeros in the diagonal and off-diagonal 3619 portions of the matrix in order to do correct preallocation 3620 */ 3621 3622 /* first get start and end of "diagonal" columns */ 3623 if (csize == PETSC_DECIDE) { 3624 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3625 if (mglobal == n) { /* square matrix */ 3626 nlocal = m; 3627 } else { 3628 nlocal = n/size + ((n % size) > rank); 3629 } 3630 } else { 3631 nlocal = csize; 3632 } 3633 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3634 rstart = rend - nlocal; 3635 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3636 3637 /* next, compute all the lengths */ 3638 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3639 olens = dlens + m; 3640 for (i=0; i<m; i++) { 3641 jend = ii[i+1] - ii[i]; 3642 olen = 0; 3643 dlen = 0; 3644 for (j=0; j<jend; j++) { 3645 if (*jj < rstart || *jj >= rend) olen++; 3646 else dlen++; 3647 jj++; 3648 } 3649 olens[i] = olen; 3650 dlens[i] = dlen; 3651 } 3652 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3653 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3654 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3655 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3656 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3657 ierr = PetscFree(dlens);CHKERRQ(ierr); 3658 } else { 3659 PetscInt ml,nl; 3660 3661 M = *newmat; 3662 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3663 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3664 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3665 /* 3666 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3667 rather than the slower MatSetValues(). 3668 */ 3669 M->was_assembled = PETSC_TRUE; 3670 M->assembled = PETSC_FALSE; 3671 } 3672 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3673 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3674 ii = aij->i; 3675 jj = aij->j; 3676 aa = aij->a; 3677 for (i=0; i<m; i++) { 3678 row = rstart + i; 3679 nz = ii[i+1] - ii[i]; 3680 cwork = jj; jj += nz; 3681 vwork = aa; aa += nz; 3682 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3683 } 3684 3685 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3686 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 *newmat = M; 3688 3689 /* save submatrix used in processor for next request */ 3690 if (call == MAT_INITIAL_MATRIX) { 3691 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3692 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3693 } 3694 PetscFunctionReturn(0); 3695 } 3696 3697 #undef __FUNCT__ 3698 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3699 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3700 { 3701 PetscInt m,cstart, cend,j,nnz,i,d; 3702 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3703 const PetscInt *JJ; 3704 PetscScalar *values; 3705 PetscErrorCode ierr; 3706 3707 PetscFunctionBegin; 3708 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3709 3710 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3711 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3712 m = B->rmap->n; 3713 cstart = B->cmap->rstart; 3714 cend = B->cmap->rend; 3715 rstart = B->rmap->rstart; 3716 3717 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3718 3719 #if defined(PETSC_USE_DEBUGGING) 3720 for (i=0; i<m; i++) { 3721 nnz = Ii[i+1]- Ii[i]; 3722 JJ = J + Ii[i]; 3723 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3724 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3725 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3726 } 3727 #endif 3728 3729 for (i=0; i<m; i++) { 3730 nnz = Ii[i+1]- Ii[i]; 3731 JJ = J + Ii[i]; 3732 nnz_max = PetscMax(nnz_max,nnz); 3733 d = 0; 3734 for (j=0; j<nnz; j++) { 3735 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3736 } 3737 d_nnz[i] = d; 3738 o_nnz[i] = nnz - d; 3739 } 3740 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3741 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3742 3743 if (v) values = (PetscScalar*)v; 3744 else { 3745 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3746 } 3747 3748 for (i=0; i<m; i++) { 3749 ii = i + rstart; 3750 nnz = Ii[i+1]- Ii[i]; 3751 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3752 } 3753 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 3756 if (!v) { 3757 ierr = PetscFree(values);CHKERRQ(ierr); 3758 } 3759 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3760 PetscFunctionReturn(0); 3761 } 3762 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3765 /*@ 3766 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3767 (the default parallel PETSc format). 3768 3769 Collective on MPI_Comm 3770 3771 Input Parameters: 3772 + B - the matrix 3773 . i - the indices into j for the start of each local row (starts with zero) 3774 . j - the column indices for each local row (starts with zero) 3775 - v - optional values in the matrix 3776 3777 Level: developer 3778 3779 Notes: 3780 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3781 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3782 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3783 3784 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3785 3786 The format which is used for the sparse matrix input, is equivalent to a 3787 row-major ordering.. i.e for the following matrix, the input data expected is 3788 as shown: 3789 3790 1 0 0 3791 2 0 3 P0 3792 ------- 3793 4 5 6 P1 3794 3795 Process0 [P0]: rows_owned=[0,1] 3796 i = {0,1,3} [size = nrow+1 = 2+1] 3797 j = {0,0,2} [size = nz = 6] 3798 v = {1,2,3} [size = nz = 6] 3799 3800 Process1 [P1]: rows_owned=[2] 3801 i = {0,3} [size = nrow+1 = 1+1] 3802 j = {0,1,2} [size = nz = 6] 3803 v = {4,5,6} [size = nz = 6] 3804 3805 .keywords: matrix, aij, compressed row, sparse, parallel 3806 3807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3808 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3809 @*/ 3810 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3811 { 3812 PetscErrorCode ierr; 3813 3814 PetscFunctionBegin; 3815 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3816 PetscFunctionReturn(0); 3817 } 3818 3819 #undef __FUNCT__ 3820 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3821 /*@C 3822 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3823 (the default parallel PETSc format). For good matrix assembly performance 3824 the user should preallocate the matrix storage by setting the parameters 3825 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3826 performance can be increased by more than a factor of 50. 3827 3828 Collective on MPI_Comm 3829 3830 Input Parameters: 3831 + B - the matrix 3832 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3833 (same value is used for all local rows) 3834 . d_nnz - array containing the number of nonzeros in the various rows of the 3835 DIAGONAL portion of the local submatrix (possibly different for each row) 3836 or NULL, if d_nz is used to specify the nonzero structure. 3837 The size of this array is equal to the number of local rows, i.e 'm'. 3838 For matrices that will be factored, you must leave room for (and set) 3839 the diagonal entry even if it is zero. 3840 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3841 submatrix (same value is used for all local rows). 3842 - o_nnz - array containing the number of nonzeros in the various rows of the 3843 OFF-DIAGONAL portion of the local submatrix (possibly different for 3844 each row) or NULL, if o_nz is used to specify the nonzero 3845 structure. The size of this array is equal to the number 3846 of local rows, i.e 'm'. 3847 3848 If the *_nnz parameter is given then the *_nz parameter is ignored 3849 3850 The AIJ format (also called the Yale sparse matrix format or 3851 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3852 storage. The stored row and column indices begin with zero. 3853 See Users-Manual: ch_mat for details. 3854 3855 The parallel matrix is partitioned such that the first m0 rows belong to 3856 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3857 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3858 3859 The DIAGONAL portion of the local submatrix of a processor can be defined 3860 as the submatrix which is obtained by extraction the part corresponding to 3861 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3862 first row that belongs to the processor, r2 is the last row belonging to 3863 the this processor, and c1-c2 is range of indices of the local part of a 3864 vector suitable for applying the matrix to. This is an mxn matrix. In the 3865 common case of a square matrix, the row and column ranges are the same and 3866 the DIAGONAL part is also square. The remaining portion of the local 3867 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3868 3869 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3870 3871 You can call MatGetInfo() to get information on how effective the preallocation was; 3872 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3873 You can also run with the option -info and look for messages with the string 3874 malloc in them to see if additional memory allocation was needed. 3875 3876 Example usage: 3877 3878 Consider the following 8x8 matrix with 34 non-zero values, that is 3879 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3880 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3881 as follows: 3882 3883 .vb 3884 1 2 0 | 0 3 0 | 0 4 3885 Proc0 0 5 6 | 7 0 0 | 8 0 3886 9 0 10 | 11 0 0 | 12 0 3887 ------------------------------------- 3888 13 0 14 | 15 16 17 | 0 0 3889 Proc1 0 18 0 | 19 20 21 | 0 0 3890 0 0 0 | 22 23 0 | 24 0 3891 ------------------------------------- 3892 Proc2 25 26 27 | 0 0 28 | 29 0 3893 30 0 0 | 31 32 33 | 0 34 3894 .ve 3895 3896 This can be represented as a collection of submatrices as: 3897 3898 .vb 3899 A B C 3900 D E F 3901 G H I 3902 .ve 3903 3904 Where the submatrices A,B,C are owned by proc0, D,E,F are 3905 owned by proc1, G,H,I are owned by proc2. 3906 3907 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3908 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3909 The 'M','N' parameters are 8,8, and have the same values on all procs. 3910 3911 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3912 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3913 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3914 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3915 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3916 matrix, ans [DF] as another SeqAIJ matrix. 3917 3918 When d_nz, o_nz parameters are specified, d_nz storage elements are 3919 allocated for every row of the local diagonal submatrix, and o_nz 3920 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3921 One way to choose d_nz and o_nz is to use the max nonzerors per local 3922 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3923 In this case, the values of d_nz,o_nz are: 3924 .vb 3925 proc0 : dnz = 2, o_nz = 2 3926 proc1 : dnz = 3, o_nz = 2 3927 proc2 : dnz = 1, o_nz = 4 3928 .ve 3929 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3930 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3931 for proc3. i.e we are using 12+15+10=37 storage locations to store 3932 34 values. 3933 3934 When d_nnz, o_nnz parameters are specified, the storage is specified 3935 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3936 In the above case the values for d_nnz,o_nnz are: 3937 .vb 3938 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3939 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3940 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3941 .ve 3942 Here the space allocated is sum of all the above values i.e 34, and 3943 hence pre-allocation is perfect. 3944 3945 Level: intermediate 3946 3947 .keywords: matrix, aij, compressed row, sparse, parallel 3948 3949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3950 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3951 @*/ 3952 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3953 { 3954 PetscErrorCode ierr; 3955 3956 PetscFunctionBegin; 3957 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3958 PetscValidType(B,1); 3959 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3960 PetscFunctionReturn(0); 3961 } 3962 3963 #undef __FUNCT__ 3964 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3965 /*@ 3966 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3967 CSR format the local rows. 3968 3969 Collective on MPI_Comm 3970 3971 Input Parameters: 3972 + comm - MPI communicator 3973 . m - number of local rows (Cannot be PETSC_DECIDE) 3974 . n - This value should be the same as the local size used in creating the 3975 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3976 calculated if N is given) For square matrices n is almost always m. 3977 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3978 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3979 . i - row indices 3980 . j - column indices 3981 - a - matrix values 3982 3983 Output Parameter: 3984 . mat - the matrix 3985 3986 Level: intermediate 3987 3988 Notes: 3989 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3990 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3991 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3992 3993 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3994 3995 The format which is used for the sparse matrix input, is equivalent to a 3996 row-major ordering.. i.e for the following matrix, the input data expected is 3997 as shown: 3998 3999 1 0 0 4000 2 0 3 P0 4001 ------- 4002 4 5 6 P1 4003 4004 Process0 [P0]: rows_owned=[0,1] 4005 i = {0,1,3} [size = nrow+1 = 2+1] 4006 j = {0,0,2} [size = nz = 6] 4007 v = {1,2,3} [size = nz = 6] 4008 4009 Process1 [P1]: rows_owned=[2] 4010 i = {0,3} [size = nrow+1 = 1+1] 4011 j = {0,1,2} [size = nz = 6] 4012 v = {4,5,6} [size = nz = 6] 4013 4014 .keywords: matrix, aij, compressed row, sparse, parallel 4015 4016 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4017 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4018 @*/ 4019 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4020 { 4021 PetscErrorCode ierr; 4022 4023 PetscFunctionBegin; 4024 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4025 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4026 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4027 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4028 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4029 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4030 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 #undef __FUNCT__ 4035 #define __FUNCT__ "MatCreateAIJ" 4036 /*@C 4037 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4038 (the default parallel PETSc format). For good matrix assembly performance 4039 the user should preallocate the matrix storage by setting the parameters 4040 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4041 performance can be increased by more than a factor of 50. 4042 4043 Collective on MPI_Comm 4044 4045 Input Parameters: 4046 + comm - MPI communicator 4047 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4048 This value should be the same as the local size used in creating the 4049 y vector for the matrix-vector product y = Ax. 4050 . n - This value should be the same as the local size used in creating the 4051 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4052 calculated if N is given) For square matrices n is almost always m. 4053 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4054 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4055 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4056 (same value is used for all local rows) 4057 . d_nnz - array containing the number of nonzeros in the various rows of the 4058 DIAGONAL portion of the local submatrix (possibly different for each row) 4059 or NULL, if d_nz is used to specify the nonzero structure. 4060 The size of this array is equal to the number of local rows, i.e 'm'. 4061 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4062 submatrix (same value is used for all local rows). 4063 - o_nnz - array containing the number of nonzeros in the various rows of the 4064 OFF-DIAGONAL portion of the local submatrix (possibly different for 4065 each row) or NULL, if o_nz is used to specify the nonzero 4066 structure. The size of this array is equal to the number 4067 of local rows, i.e 'm'. 4068 4069 Output Parameter: 4070 . A - the matrix 4071 4072 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4073 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4074 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4075 4076 Notes: 4077 If the *_nnz parameter is given then the *_nz parameter is ignored 4078 4079 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4080 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4081 storage requirements for this matrix. 4082 4083 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4084 processor than it must be used on all processors that share the object for 4085 that argument. 4086 4087 The user MUST specify either the local or global matrix dimensions 4088 (possibly both). 4089 4090 The parallel matrix is partitioned across processors such that the 4091 first m0 rows belong to process 0, the next m1 rows belong to 4092 process 1, the next m2 rows belong to process 2 etc.. where 4093 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4094 values corresponding to [m x N] submatrix. 4095 4096 The columns are logically partitioned with the n0 columns belonging 4097 to 0th partition, the next n1 columns belonging to the next 4098 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4099 4100 The DIAGONAL portion of the local submatrix on any given processor 4101 is the submatrix corresponding to the rows and columns m,n 4102 corresponding to the given processor. i.e diagonal matrix on 4103 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4104 etc. The remaining portion of the local submatrix [m x (N-n)] 4105 constitute the OFF-DIAGONAL portion. The example below better 4106 illustrates this concept. 4107 4108 For a square global matrix we define each processor's diagonal portion 4109 to be its local rows and the corresponding columns (a square submatrix); 4110 each processor's off-diagonal portion encompasses the remainder of the 4111 local matrix (a rectangular submatrix). 4112 4113 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4114 4115 When calling this routine with a single process communicator, a matrix of 4116 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4117 type of communicator, use the construction mechanism: 4118 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4119 4120 By default, this format uses inodes (identical nodes) when possible. 4121 We search for consecutive rows with the same nonzero structure, thereby 4122 reusing matrix information to achieve increased efficiency. 4123 4124 Options Database Keys: 4125 + -mat_no_inode - Do not use inodes 4126 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4127 - -mat_aij_oneindex - Internally use indexing starting at 1 4128 rather than 0. Note that when calling MatSetValues(), 4129 the user still MUST index entries starting at 0! 4130 4131 4132 Example usage: 4133 4134 Consider the following 8x8 matrix with 34 non-zero values, that is 4135 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4136 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4137 as follows: 4138 4139 .vb 4140 1 2 0 | 0 3 0 | 0 4 4141 Proc0 0 5 6 | 7 0 0 | 8 0 4142 9 0 10 | 11 0 0 | 12 0 4143 ------------------------------------- 4144 13 0 14 | 15 16 17 | 0 0 4145 Proc1 0 18 0 | 19 20 21 | 0 0 4146 0 0 0 | 22 23 0 | 24 0 4147 ------------------------------------- 4148 Proc2 25 26 27 | 0 0 28 | 29 0 4149 30 0 0 | 31 32 33 | 0 34 4150 .ve 4151 4152 This can be represented as a collection of submatrices as: 4153 4154 .vb 4155 A B C 4156 D E F 4157 G H I 4158 .ve 4159 4160 Where the submatrices A,B,C are owned by proc0, D,E,F are 4161 owned by proc1, G,H,I are owned by proc2. 4162 4163 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4164 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4165 The 'M','N' parameters are 8,8, and have the same values on all procs. 4166 4167 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4168 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4169 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4170 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4171 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4172 matrix, ans [DF] as another SeqAIJ matrix. 4173 4174 When d_nz, o_nz parameters are specified, d_nz storage elements are 4175 allocated for every row of the local diagonal submatrix, and o_nz 4176 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4177 One way to choose d_nz and o_nz is to use the max nonzerors per local 4178 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4179 In this case, the values of d_nz,o_nz are: 4180 .vb 4181 proc0 : dnz = 2, o_nz = 2 4182 proc1 : dnz = 3, o_nz = 2 4183 proc2 : dnz = 1, o_nz = 4 4184 .ve 4185 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4186 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4187 for proc3. i.e we are using 12+15+10=37 storage locations to store 4188 34 values. 4189 4190 When d_nnz, o_nnz parameters are specified, the storage is specified 4191 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4192 In the above case the values for d_nnz,o_nnz are: 4193 .vb 4194 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4195 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4196 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4197 .ve 4198 Here the space allocated is sum of all the above values i.e 34, and 4199 hence pre-allocation is perfect. 4200 4201 Level: intermediate 4202 4203 .keywords: matrix, aij, compressed row, sparse, parallel 4204 4205 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4206 MPIAIJ, MatCreateMPIAIJWithArrays() 4207 @*/ 4208 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4209 { 4210 PetscErrorCode ierr; 4211 PetscMPIInt size; 4212 4213 PetscFunctionBegin; 4214 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4215 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4216 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4217 if (size > 1) { 4218 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4219 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4220 } else { 4221 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4222 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4223 } 4224 PetscFunctionReturn(0); 4225 } 4226 4227 #undef __FUNCT__ 4228 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4229 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4230 { 4231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4232 4233 PetscFunctionBegin; 4234 if (Ad) *Ad = a->A; 4235 if (Ao) *Ao = a->B; 4236 if (colmap) *colmap = a->garray; 4237 PetscFunctionReturn(0); 4238 } 4239 4240 #undef __FUNCT__ 4241 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4242 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4243 { 4244 PetscErrorCode ierr; 4245 PetscInt i; 4246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4247 4248 PetscFunctionBegin; 4249 if (coloring->ctype == IS_COLORING_GLOBAL) { 4250 ISColoringValue *allcolors,*colors; 4251 ISColoring ocoloring; 4252 4253 /* set coloring for diagonal portion */ 4254 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4255 4256 /* set coloring for off-diagonal portion */ 4257 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4258 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4259 for (i=0; i<a->B->cmap->n; i++) { 4260 colors[i] = allcolors[a->garray[i]]; 4261 } 4262 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4263 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4264 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4265 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4266 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4267 ISColoringValue *colors; 4268 PetscInt *larray; 4269 ISColoring ocoloring; 4270 4271 /* set coloring for diagonal portion */ 4272 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4273 for (i=0; i<a->A->cmap->n; i++) { 4274 larray[i] = i + A->cmap->rstart; 4275 } 4276 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4277 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4278 for (i=0; i<a->A->cmap->n; i++) { 4279 colors[i] = coloring->colors[larray[i]]; 4280 } 4281 ierr = PetscFree(larray);CHKERRQ(ierr); 4282 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4283 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4284 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4285 4286 /* set coloring for off-diagonal portion */ 4287 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4288 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4289 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4290 for (i=0; i<a->B->cmap->n; i++) { 4291 colors[i] = coloring->colors[larray[i]]; 4292 } 4293 ierr = PetscFree(larray);CHKERRQ(ierr); 4294 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4295 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4296 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4297 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4303 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4304 { 4305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4306 PetscErrorCode ierr; 4307 4308 PetscFunctionBegin; 4309 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4310 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4311 PetscFunctionReturn(0); 4312 } 4313 4314 #undef __FUNCT__ 4315 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4316 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4317 { 4318 PetscErrorCode ierr; 4319 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4320 PetscInt *indx; 4321 4322 PetscFunctionBegin; 4323 /* This routine will ONLY return MPIAIJ type matrix */ 4324 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4325 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4326 if (n == PETSC_DECIDE) { 4327 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4328 } 4329 /* Check sum(n) = N */ 4330 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4331 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4332 4333 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4334 rstart -= m; 4335 4336 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4337 for (i=0; i<m; i++) { 4338 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4339 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4340 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4341 } 4342 4343 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4344 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4345 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4346 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4347 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4348 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4349 PetscFunctionReturn(0); 4350 } 4351 4352 #undef __FUNCT__ 4353 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4354 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4355 { 4356 PetscErrorCode ierr; 4357 PetscInt m,N,i,rstart,nnz,Ii; 4358 PetscInt *indx; 4359 PetscScalar *values; 4360 4361 PetscFunctionBegin; 4362 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4363 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4364 for (i=0; i<m; i++) { 4365 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4366 Ii = i + rstart; 4367 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4368 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4369 } 4370 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4371 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 PetscFunctionReturn(0); 4373 } 4374 4375 #undef __FUNCT__ 4376 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4377 /*@ 4378 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4379 matrices from each processor 4380 4381 Collective on MPI_Comm 4382 4383 Input Parameters: 4384 + comm - the communicators the parallel matrix will live on 4385 . inmat - the input sequential matrices 4386 . n - number of local columns (or PETSC_DECIDE) 4387 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4388 4389 Output Parameter: 4390 . outmat - the parallel matrix generated 4391 4392 Level: advanced 4393 4394 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4395 4396 @*/ 4397 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4398 { 4399 PetscErrorCode ierr; 4400 PetscMPIInt size; 4401 4402 PetscFunctionBegin; 4403 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4404 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4405 if (size == 1) { 4406 if (scall == MAT_INITIAL_MATRIX) { 4407 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4408 } else { 4409 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4410 } 4411 } else { 4412 if (scall == MAT_INITIAL_MATRIX) { 4413 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4414 } 4415 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4416 } 4417 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4418 PetscFunctionReturn(0); 4419 } 4420 4421 #undef __FUNCT__ 4422 #define __FUNCT__ "MatFileSplit" 4423 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4424 { 4425 PetscErrorCode ierr; 4426 PetscMPIInt rank; 4427 PetscInt m,N,i,rstart,nnz; 4428 size_t len; 4429 const PetscInt *indx; 4430 PetscViewer out; 4431 char *name; 4432 Mat B; 4433 const PetscScalar *values; 4434 4435 PetscFunctionBegin; 4436 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4437 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4438 /* Should this be the type of the diagonal block of A? */ 4439 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4440 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4441 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4442 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4443 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4444 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4445 for (i=0; i<m; i++) { 4446 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4447 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4448 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4449 } 4450 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4451 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 4453 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4454 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4455 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4456 sprintf(name,"%s.%d",outfile,rank); 4457 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4458 ierr = PetscFree(name);CHKERRQ(ierr); 4459 ierr = MatView(B,out);CHKERRQ(ierr); 4460 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4461 ierr = MatDestroy(&B);CHKERRQ(ierr); 4462 PetscFunctionReturn(0); 4463 } 4464 4465 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4466 #undef __FUNCT__ 4467 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4468 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4469 { 4470 PetscErrorCode ierr; 4471 Mat_Merge_SeqsToMPI *merge; 4472 PetscContainer container; 4473 4474 PetscFunctionBegin; 4475 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4476 if (container) { 4477 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4490 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4491 ierr = PetscFree(merge);CHKERRQ(ierr); 4492 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4493 } 4494 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 #include <../src/mat/utils/freespace.h> 4499 #include <petscbt.h> 4500 4501 #undef __FUNCT__ 4502 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4503 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4504 { 4505 PetscErrorCode ierr; 4506 MPI_Comm comm; 4507 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4508 PetscMPIInt size,rank,taga,*len_s; 4509 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4510 PetscInt proc,m; 4511 PetscInt **buf_ri,**buf_rj; 4512 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4513 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4514 MPI_Request *s_waits,*r_waits; 4515 MPI_Status *status; 4516 MatScalar *aa=a->a; 4517 MatScalar **abuf_r,*ba_i; 4518 Mat_Merge_SeqsToMPI *merge; 4519 PetscContainer container; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4523 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4524 4525 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4526 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4527 4528 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4529 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4530 4531 bi = merge->bi; 4532 bj = merge->bj; 4533 buf_ri = merge->buf_ri; 4534 buf_rj = merge->buf_rj; 4535 4536 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4537 owners = merge->rowmap->range; 4538 len_s = merge->len_s; 4539 4540 /* send and recv matrix values */ 4541 /*-----------------------------*/ 4542 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4543 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4544 4545 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4546 for (proc=0,k=0; proc<size; proc++) { 4547 if (!len_s[proc]) continue; 4548 i = owners[proc]; 4549 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4550 k++; 4551 } 4552 4553 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4554 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4555 ierr = PetscFree(status);CHKERRQ(ierr); 4556 4557 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4558 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4559 4560 /* insert mat values of mpimat */ 4561 /*----------------------------*/ 4562 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4563 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4564 4565 for (k=0; k<merge->nrecv; k++) { 4566 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4567 nrows = *(buf_ri_k[k]); 4568 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4569 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4570 } 4571 4572 /* set values of ba */ 4573 m = merge->rowmap->n; 4574 for (i=0; i<m; i++) { 4575 arow = owners[rank] + i; 4576 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4577 bnzi = bi[i+1] - bi[i]; 4578 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4579 4580 /* add local non-zero vals of this proc's seqmat into ba */ 4581 anzi = ai[arow+1] - ai[arow]; 4582 aj = a->j + ai[arow]; 4583 aa = a->a + ai[arow]; 4584 nextaj = 0; 4585 for (j=0; nextaj<anzi; j++) { 4586 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4587 ba_i[j] += aa[nextaj++]; 4588 } 4589 } 4590 4591 /* add received vals into ba */ 4592 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4593 /* i-th row */ 4594 if (i == *nextrow[k]) { 4595 anzi = *(nextai[k]+1) - *nextai[k]; 4596 aj = buf_rj[k] + *(nextai[k]); 4597 aa = abuf_r[k] + *(nextai[k]); 4598 nextaj = 0; 4599 for (j=0; nextaj<anzi; j++) { 4600 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4601 ba_i[j] += aa[nextaj++]; 4602 } 4603 } 4604 nextrow[k]++; nextai[k]++; 4605 } 4606 } 4607 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4608 } 4609 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4610 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 4612 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4613 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4614 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4615 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4616 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4617 PetscFunctionReturn(0); 4618 } 4619 4620 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4621 4622 #undef __FUNCT__ 4623 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4624 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4625 { 4626 PetscErrorCode ierr; 4627 Mat B_mpi; 4628 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4629 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4630 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4631 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4632 PetscInt len,proc,*dnz,*onz,bs,cbs; 4633 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4634 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4635 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4636 MPI_Status *status; 4637 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4638 PetscBT lnkbt; 4639 Mat_Merge_SeqsToMPI *merge; 4640 PetscContainer container; 4641 4642 PetscFunctionBegin; 4643 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4644 4645 /* make sure it is a PETSc comm */ 4646 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4647 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4648 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4649 4650 ierr = PetscNew(&merge);CHKERRQ(ierr); 4651 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4652 4653 /* determine row ownership */ 4654 /*---------------------------------------------------------*/ 4655 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4656 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4657 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4661 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4662 4663 m = merge->rowmap->n; 4664 owners = merge->rowmap->range; 4665 4666 /* determine the number of messages to send, their lengths */ 4667 /*---------------------------------------------------------*/ 4668 len_s = merge->len_s; 4669 4670 len = 0; /* length of buf_si[] */ 4671 merge->nsend = 0; 4672 for (proc=0; proc<size; proc++) { 4673 len_si[proc] = 0; 4674 if (proc == rank) { 4675 len_s[proc] = 0; 4676 } else { 4677 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4678 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4679 } 4680 if (len_s[proc]) { 4681 merge->nsend++; 4682 nrows = 0; 4683 for (i=owners[proc]; i<owners[proc+1]; i++) { 4684 if (ai[i+1] > ai[i]) nrows++; 4685 } 4686 len_si[proc] = 2*(nrows+1); 4687 len += len_si[proc]; 4688 } 4689 } 4690 4691 /* determine the number and length of messages to receive for ij-structure */ 4692 /*-------------------------------------------------------------------------*/ 4693 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4694 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4695 4696 /* post the Irecv of j-structure */ 4697 /*-------------------------------*/ 4698 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4699 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4700 4701 /* post the Isend of j-structure */ 4702 /*--------------------------------*/ 4703 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4704 4705 for (proc=0, k=0; proc<size; proc++) { 4706 if (!len_s[proc]) continue; 4707 i = owners[proc]; 4708 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4709 k++; 4710 } 4711 4712 /* receives and sends of j-structure are complete */ 4713 /*------------------------------------------------*/ 4714 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4715 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4716 4717 /* send and recv i-structure */ 4718 /*---------------------------*/ 4719 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4720 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4721 4722 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4723 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4724 for (proc=0,k=0; proc<size; proc++) { 4725 if (!len_s[proc]) continue; 4726 /* form outgoing message for i-structure: 4727 buf_si[0]: nrows to be sent 4728 [1:nrows]: row index (global) 4729 [nrows+1:2*nrows+1]: i-structure index 4730 */ 4731 /*-------------------------------------------*/ 4732 nrows = len_si[proc]/2 - 1; 4733 buf_si_i = buf_si + nrows+1; 4734 buf_si[0] = nrows; 4735 buf_si_i[0] = 0; 4736 nrows = 0; 4737 for (i=owners[proc]; i<owners[proc+1]; i++) { 4738 anzi = ai[i+1] - ai[i]; 4739 if (anzi) { 4740 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4741 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4742 nrows++; 4743 } 4744 } 4745 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4746 k++; 4747 buf_si += len_si[proc]; 4748 } 4749 4750 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4751 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4752 4753 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4754 for (i=0; i<merge->nrecv; i++) { 4755 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4756 } 4757 4758 ierr = PetscFree(len_si);CHKERRQ(ierr); 4759 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4760 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4761 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4762 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4763 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4764 ierr = PetscFree(status);CHKERRQ(ierr); 4765 4766 /* compute a local seq matrix in each processor */ 4767 /*----------------------------------------------*/ 4768 /* allocate bi array and free space for accumulating nonzero column info */ 4769 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4770 bi[0] = 0; 4771 4772 /* create and initialize a linked list */ 4773 nlnk = N+1; 4774 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4775 4776 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4777 len = ai[owners[rank+1]] - ai[owners[rank]]; 4778 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4779 4780 current_space = free_space; 4781 4782 /* determine symbolic info for each local row */ 4783 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4784 4785 for (k=0; k<merge->nrecv; k++) { 4786 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4787 nrows = *buf_ri_k[k]; 4788 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4789 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4790 } 4791 4792 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4793 len = 0; 4794 for (i=0; i<m; i++) { 4795 bnzi = 0; 4796 /* add local non-zero cols of this proc's seqmat into lnk */ 4797 arow = owners[rank] + i; 4798 anzi = ai[arow+1] - ai[arow]; 4799 aj = a->j + ai[arow]; 4800 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4801 bnzi += nlnk; 4802 /* add received col data into lnk */ 4803 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4804 if (i == *nextrow[k]) { /* i-th row */ 4805 anzi = *(nextai[k]+1) - *nextai[k]; 4806 aj = buf_rj[k] + *nextai[k]; 4807 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4808 bnzi += nlnk; 4809 nextrow[k]++; nextai[k]++; 4810 } 4811 } 4812 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4813 4814 /* if free space is not available, make more free space */ 4815 if (current_space->local_remaining<bnzi) { 4816 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4817 nspacedouble++; 4818 } 4819 /* copy data into free space, then initialize lnk */ 4820 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4821 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4822 4823 current_space->array += bnzi; 4824 current_space->local_used += bnzi; 4825 current_space->local_remaining -= bnzi; 4826 4827 bi[i+1] = bi[i] + bnzi; 4828 } 4829 4830 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4831 4832 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4833 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4834 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4835 4836 /* create symbolic parallel matrix B_mpi */ 4837 /*---------------------------------------*/ 4838 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4839 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4840 if (n==PETSC_DECIDE) { 4841 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4842 } else { 4843 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4844 } 4845 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4846 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4847 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4848 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4849 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4850 4851 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4852 B_mpi->assembled = PETSC_FALSE; 4853 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4854 merge->bi = bi; 4855 merge->bj = bj; 4856 merge->buf_ri = buf_ri; 4857 merge->buf_rj = buf_rj; 4858 merge->coi = NULL; 4859 merge->coj = NULL; 4860 merge->owners_co = NULL; 4861 4862 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4863 4864 /* attach the supporting struct to B_mpi for reuse */ 4865 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4866 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4867 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4868 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4869 *mpimat = B_mpi; 4870 4871 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4872 PetscFunctionReturn(0); 4873 } 4874 4875 #undef __FUNCT__ 4876 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4877 /*@C 4878 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4879 matrices from each processor 4880 4881 Collective on MPI_Comm 4882 4883 Input Parameters: 4884 + comm - the communicators the parallel matrix will live on 4885 . seqmat - the input sequential matrices 4886 . m - number of local rows (or PETSC_DECIDE) 4887 . n - number of local columns (or PETSC_DECIDE) 4888 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4889 4890 Output Parameter: 4891 . mpimat - the parallel matrix generated 4892 4893 Level: advanced 4894 4895 Notes: 4896 The dimensions of the sequential matrix in each processor MUST be the same. 4897 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4898 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4899 @*/ 4900 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4901 { 4902 PetscErrorCode ierr; 4903 PetscMPIInt size; 4904 4905 PetscFunctionBegin; 4906 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4907 if (size == 1) { 4908 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4909 if (scall == MAT_INITIAL_MATRIX) { 4910 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4911 } else { 4912 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4913 } 4914 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 PetscFunctionReturn(0); 4916 } 4917 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4918 if (scall == MAT_INITIAL_MATRIX) { 4919 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4920 } 4921 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4922 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4923 PetscFunctionReturn(0); 4924 } 4925 4926 #undef __FUNCT__ 4927 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4928 /*@ 4929 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4930 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4931 with MatGetSize() 4932 4933 Not Collective 4934 4935 Input Parameters: 4936 + A - the matrix 4937 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4938 4939 Output Parameter: 4940 . A_loc - the local sequential matrix generated 4941 4942 Level: developer 4943 4944 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4945 4946 @*/ 4947 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4948 { 4949 PetscErrorCode ierr; 4950 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4951 Mat_SeqAIJ *mat,*a,*b; 4952 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4953 MatScalar *aa,*ba,*cam; 4954 PetscScalar *ca; 4955 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4956 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4957 PetscBool match; 4958 MPI_Comm comm; 4959 PetscMPIInt size; 4960 4961 PetscFunctionBegin; 4962 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4963 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4964 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4965 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4966 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4967 4968 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4969 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4970 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4971 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4972 aa = a->a; ba = b->a; 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 if (size == 1) { 4975 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4976 PetscFunctionReturn(0); 4977 } 4978 4979 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4980 ci[0] = 0; 4981 for (i=0; i<am; i++) { 4982 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4983 } 4984 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4985 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4986 k = 0; 4987 for (i=0; i<am; i++) { 4988 ncols_o = bi[i+1] - bi[i]; 4989 ncols_d = ai[i+1] - ai[i]; 4990 /* off-diagonal portion of A */ 4991 for (jo=0; jo<ncols_o; jo++) { 4992 col = cmap[*bj]; 4993 if (col >= cstart) break; 4994 cj[k] = col; bj++; 4995 ca[k++] = *ba++; 4996 } 4997 /* diagonal portion of A */ 4998 for (j=0; j<ncols_d; j++) { 4999 cj[k] = cstart + *aj++; 5000 ca[k++] = *aa++; 5001 } 5002 /* off-diagonal portion of A */ 5003 for (j=jo; j<ncols_o; j++) { 5004 cj[k] = cmap[*bj++]; 5005 ca[k++] = *ba++; 5006 } 5007 } 5008 /* put together the new matrix */ 5009 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5010 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5011 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5012 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5013 mat->free_a = PETSC_TRUE; 5014 mat->free_ij = PETSC_TRUE; 5015 mat->nonew = 0; 5016 } else if (scall == MAT_REUSE_MATRIX) { 5017 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5018 ci = mat->i; cj = mat->j; cam = mat->a; 5019 for (i=0; i<am; i++) { 5020 /* off-diagonal portion of A */ 5021 ncols_o = bi[i+1] - bi[i]; 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 *cam++ = *ba++; bj++; 5026 } 5027 /* diagonal portion of A */ 5028 ncols_d = ai[i+1] - ai[i]; 5029 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5030 /* off-diagonal portion of A */ 5031 for (j=jo; j<ncols_o; j++) { 5032 *cam++ = *ba++; bj++; 5033 } 5034 } 5035 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5036 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5037 PetscFunctionReturn(0); 5038 } 5039 5040 #undef __FUNCT__ 5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5042 /*@C 5043 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5044 5045 Not Collective 5046 5047 Input Parameters: 5048 + A - the matrix 5049 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5050 - row, col - index sets of rows and columns to extract (or NULL) 5051 5052 Output Parameter: 5053 . A_loc - the local sequential matrix generated 5054 5055 Level: developer 5056 5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5058 5059 @*/ 5060 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5061 { 5062 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5063 PetscErrorCode ierr; 5064 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5065 IS isrowa,iscola; 5066 Mat *aloc; 5067 PetscBool match; 5068 5069 PetscFunctionBegin; 5070 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5071 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5072 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5073 if (!row) { 5074 start = A->rmap->rstart; end = A->rmap->rend; 5075 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5076 } else { 5077 isrowa = *row; 5078 } 5079 if (!col) { 5080 start = A->cmap->rstart; 5081 cmap = a->garray; 5082 nzA = a->A->cmap->n; 5083 nzB = a->B->cmap->n; 5084 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5085 ncols = 0; 5086 for (i=0; i<nzB; i++) { 5087 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5088 else break; 5089 } 5090 imark = i; 5091 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5092 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5093 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5094 } else { 5095 iscola = *col; 5096 } 5097 if (scall != MAT_INITIAL_MATRIX) { 5098 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5099 aloc[0] = *A_loc; 5100 } 5101 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5102 *A_loc = aloc[0]; 5103 ierr = PetscFree(aloc);CHKERRQ(ierr); 5104 if (!row) { 5105 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5106 } 5107 if (!col) { 5108 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5109 } 5110 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5111 PetscFunctionReturn(0); 5112 } 5113 5114 #undef __FUNCT__ 5115 #define __FUNCT__ "MatGetBrowsOfAcols" 5116 /*@C 5117 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5118 5119 Collective on Mat 5120 5121 Input Parameters: 5122 + A,B - the matrices in mpiaij format 5123 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5125 5126 Output Parameter: 5127 + rowb, colb - index sets of rows and columns of B to extract 5128 - B_seq - the sequential matrix generated 5129 5130 Level: developer 5131 5132 @*/ 5133 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5134 { 5135 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5136 PetscErrorCode ierr; 5137 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5138 IS isrowb,iscolb; 5139 Mat *bseq=NULL; 5140 5141 PetscFunctionBegin; 5142 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5143 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5144 } 5145 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5146 5147 if (scall == MAT_INITIAL_MATRIX) { 5148 start = A->cmap->rstart; 5149 cmap = a->garray; 5150 nzA = a->A->cmap->n; 5151 nzB = a->B->cmap->n; 5152 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5153 ncols = 0; 5154 for (i=0; i<nzB; i++) { /* row < local row index */ 5155 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5156 else break; 5157 } 5158 imark = i; 5159 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5160 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5161 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5162 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5163 } else { 5164 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5165 isrowb = *rowb; iscolb = *colb; 5166 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5167 bseq[0] = *B_seq; 5168 } 5169 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5170 *B_seq = bseq[0]; 5171 ierr = PetscFree(bseq);CHKERRQ(ierr); 5172 if (!rowb) { 5173 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5174 } else { 5175 *rowb = isrowb; 5176 } 5177 if (!colb) { 5178 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5179 } else { 5180 *colb = iscolb; 5181 } 5182 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5188 /* 5189 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5190 of the OFF-DIAGONAL portion of local A 5191 5192 Collective on Mat 5193 5194 Input Parameters: 5195 + A,B - the matrices in mpiaij format 5196 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5197 5198 Output Parameter: 5199 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5200 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5201 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5202 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5203 5204 Level: developer 5205 5206 */ 5207 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5208 { 5209 VecScatter_MPI_General *gen_to,*gen_from; 5210 PetscErrorCode ierr; 5211 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5212 Mat_SeqAIJ *b_oth; 5213 VecScatter ctx =a->Mvctx; 5214 MPI_Comm comm; 5215 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5216 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5217 PetscScalar *rvalues,*svalues; 5218 MatScalar *b_otha,*bufa,*bufA; 5219 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5220 MPI_Request *rwaits = NULL,*swaits = NULL; 5221 MPI_Status *sstatus,rstatus; 5222 PetscMPIInt jj,size; 5223 PetscInt *cols,sbs,rbs; 5224 PetscScalar *vals; 5225 5226 PetscFunctionBegin; 5227 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5228 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5229 if (size == 1) PetscFunctionReturn(0); 5230 5231 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5232 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5233 } 5234 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5235 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5236 5237 gen_to = (VecScatter_MPI_General*)ctx->todata; 5238 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5239 rvalues = gen_from->values; /* holds the length of receiving row */ 5240 svalues = gen_to->values; /* holds the length of sending row */ 5241 nrecvs = gen_from->n; 5242 nsends = gen_to->n; 5243 5244 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5245 srow = gen_to->indices; /* local row index to be sent */ 5246 sstarts = gen_to->starts; 5247 sprocs = gen_to->procs; 5248 sstatus = gen_to->sstatus; 5249 sbs = gen_to->bs; 5250 rstarts = gen_from->starts; 5251 rprocs = gen_from->procs; 5252 rbs = gen_from->bs; 5253 5254 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5255 if (scall == MAT_INITIAL_MATRIX) { 5256 /* i-array */ 5257 /*---------*/ 5258 /* post receives */ 5259 for (i=0; i<nrecvs; i++) { 5260 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5261 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5262 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5263 } 5264 5265 /* pack the outgoing message */ 5266 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5267 5268 sstartsj[0] = 0; 5269 rstartsj[0] = 0; 5270 len = 0; /* total length of j or a array to be sent */ 5271 k = 0; 5272 for (i=0; i<nsends; i++) { 5273 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5274 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5275 for (j=0; j<nrows; j++) { 5276 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5277 for (l=0; l<sbs; l++) { 5278 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5279 5280 rowlen[j*sbs+l] = ncols; 5281 5282 len += ncols; 5283 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5284 } 5285 k++; 5286 } 5287 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5288 5289 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5290 } 5291 /* recvs and sends of i-array are completed */ 5292 i = nrecvs; 5293 while (i--) { 5294 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5295 } 5296 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5297 5298 /* allocate buffers for sending j and a arrays */ 5299 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5300 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5301 5302 /* create i-array of B_oth */ 5303 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5304 5305 b_othi[0] = 0; 5306 len = 0; /* total length of j or a array to be received */ 5307 k = 0; 5308 for (i=0; i<nrecvs; i++) { 5309 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5310 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5311 for (j=0; j<nrows; j++) { 5312 b_othi[k+1] = b_othi[k] + rowlen[j]; 5313 len += rowlen[j]; k++; 5314 } 5315 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5316 } 5317 5318 /* allocate space for j and a arrrays of B_oth */ 5319 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5320 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5321 5322 /* j-array */ 5323 /*---------*/ 5324 /* post receives of j-array */ 5325 for (i=0; i<nrecvs; i++) { 5326 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5327 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5328 } 5329 5330 /* pack the outgoing message j-array */ 5331 k = 0; 5332 for (i=0; i<nsends; i++) { 5333 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5334 bufJ = bufj+sstartsj[i]; 5335 for (j=0; j<nrows; j++) { 5336 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5337 for (ll=0; ll<sbs; ll++) { 5338 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5339 for (l=0; l<ncols; l++) { 5340 *bufJ++ = cols[l]; 5341 } 5342 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5343 } 5344 } 5345 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5346 } 5347 5348 /* recvs and sends of j-array are completed */ 5349 i = nrecvs; 5350 while (i--) { 5351 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5352 } 5353 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5354 } else if (scall == MAT_REUSE_MATRIX) { 5355 sstartsj = *startsj_s; 5356 rstartsj = *startsj_r; 5357 bufa = *bufa_ptr; 5358 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5359 b_otha = b_oth->a; 5360 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5361 5362 /* a-array */ 5363 /*---------*/ 5364 /* post receives of a-array */ 5365 for (i=0; i<nrecvs; i++) { 5366 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5367 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5368 } 5369 5370 /* pack the outgoing message a-array */ 5371 k = 0; 5372 for (i=0; i<nsends; i++) { 5373 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5374 bufA = bufa+sstartsj[i]; 5375 for (j=0; j<nrows; j++) { 5376 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5377 for (ll=0; ll<sbs; ll++) { 5378 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5379 for (l=0; l<ncols; l++) { 5380 *bufA++ = vals[l]; 5381 } 5382 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5383 } 5384 } 5385 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5386 } 5387 /* recvs and sends of a-array are completed */ 5388 i = nrecvs; 5389 while (i--) { 5390 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5391 } 5392 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5393 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5394 5395 if (scall == MAT_INITIAL_MATRIX) { 5396 /* put together the new matrix */ 5397 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5398 5399 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5400 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5401 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5402 b_oth->free_a = PETSC_TRUE; 5403 b_oth->free_ij = PETSC_TRUE; 5404 b_oth->nonew = 0; 5405 5406 ierr = PetscFree(bufj);CHKERRQ(ierr); 5407 if (!startsj_s || !bufa_ptr) { 5408 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5409 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5410 } else { 5411 *startsj_s = sstartsj; 5412 *startsj_r = rstartsj; 5413 *bufa_ptr = bufa; 5414 } 5415 } 5416 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5417 PetscFunctionReturn(0); 5418 } 5419 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "MatGetCommunicationStructs" 5422 /*@C 5423 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5424 5425 Not Collective 5426 5427 Input Parameters: 5428 . A - The matrix in mpiaij format 5429 5430 Output Parameter: 5431 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5432 . colmap - A map from global column index to local index into lvec 5433 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5434 5435 Level: developer 5436 5437 @*/ 5438 #if defined(PETSC_USE_CTABLE) 5439 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5440 #else 5441 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5442 #endif 5443 { 5444 Mat_MPIAIJ *a; 5445 5446 PetscFunctionBegin; 5447 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5448 PetscValidPointer(lvec, 2); 5449 PetscValidPointer(colmap, 3); 5450 PetscValidPointer(multScatter, 4); 5451 a = (Mat_MPIAIJ*) A->data; 5452 if (lvec) *lvec = a->lvec; 5453 if (colmap) *colmap = a->colmap; 5454 if (multScatter) *multScatter = a->Mvctx; 5455 PetscFunctionReturn(0); 5456 } 5457 5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5461 5462 #undef __FUNCT__ 5463 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5464 /* 5465 Computes (B'*A')' since computing B*A directly is untenable 5466 5467 n p p 5468 ( ) ( ) ( ) 5469 m ( A ) * n ( B ) = m ( C ) 5470 ( ) ( ) ( ) 5471 5472 */ 5473 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5474 { 5475 PetscErrorCode ierr; 5476 Mat At,Bt,Ct; 5477 5478 PetscFunctionBegin; 5479 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5480 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5481 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5482 ierr = MatDestroy(&At);CHKERRQ(ierr); 5483 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5484 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5485 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5486 PetscFunctionReturn(0); 5487 } 5488 5489 #undef __FUNCT__ 5490 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5491 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5492 { 5493 PetscErrorCode ierr; 5494 PetscInt m=A->rmap->n,n=B->cmap->n; 5495 Mat Cmat; 5496 5497 PetscFunctionBegin; 5498 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5499 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5500 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5501 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5502 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5503 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5504 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5505 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 5507 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5508 5509 *C = Cmat; 5510 PetscFunctionReturn(0); 5511 } 5512 5513 /* ----------------------------------------------------------------*/ 5514 #undef __FUNCT__ 5515 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5516 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5517 { 5518 PetscErrorCode ierr; 5519 5520 PetscFunctionBegin; 5521 if (scall == MAT_INITIAL_MATRIX) { 5522 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5523 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5524 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5525 } 5526 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5527 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5528 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5529 PetscFunctionReturn(0); 5530 } 5531 5532 #if defined(PETSC_HAVE_MUMPS) 5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5534 #endif 5535 #if defined(PETSC_HAVE_PASTIX) 5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_SUPERLU_DIST) 5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_CLIQUE) 5542 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5543 #endif 5544 5545 /*MC 5546 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5547 5548 Options Database Keys: 5549 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5550 5551 Level: beginner 5552 5553 .seealso: MatCreateAIJ() 5554 M*/ 5555 5556 #undef __FUNCT__ 5557 #define __FUNCT__ "MatCreate_MPIAIJ" 5558 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5559 { 5560 Mat_MPIAIJ *b; 5561 PetscErrorCode ierr; 5562 PetscMPIInt size; 5563 5564 PetscFunctionBegin; 5565 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5566 5567 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5568 B->data = (void*)b; 5569 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5570 B->assembled = PETSC_FALSE; 5571 B->insertmode = NOT_SET_VALUES; 5572 b->size = size; 5573 5574 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5575 5576 /* build cache for off array entries formed */ 5577 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5578 5579 b->donotstash = PETSC_FALSE; 5580 b->colmap = 0; 5581 b->garray = 0; 5582 b->roworiented = PETSC_TRUE; 5583 5584 /* stuff used for matrix vector multiply */ 5585 b->lvec = NULL; 5586 b->Mvctx = NULL; 5587 5588 /* stuff for MatGetRow() */ 5589 b->rowindices = 0; 5590 b->rowvalues = 0; 5591 b->getrowactive = PETSC_FALSE; 5592 5593 /* flexible pointer used in CUSP/CUSPARSE classes */ 5594 b->spptr = NULL; 5595 5596 #if defined(PETSC_HAVE_MUMPS) 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5598 #endif 5599 #if defined(PETSC_HAVE_PASTIX) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_SUPERLU_DIST) 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5604 #endif 5605 #if defined(PETSC_HAVE_CLIQUE) 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5607 #endif 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5622 PetscFunctionReturn(0); 5623 } 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5627 /*@C 5628 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5629 and "off-diagonal" part of the matrix in CSR format. 5630 5631 Collective on MPI_Comm 5632 5633 Input Parameters: 5634 + comm - MPI communicator 5635 . m - number of local rows (Cannot be PETSC_DECIDE) 5636 . n - This value should be the same as the local size used in creating the 5637 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5638 calculated if N is given) For square matrices n is almost always m. 5639 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5640 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5641 . i - row indices for "diagonal" portion of matrix 5642 . j - column indices 5643 . a - matrix values 5644 . oi - row indices for "off-diagonal" portion of matrix 5645 . oj - column indices 5646 - oa - matrix values 5647 5648 Output Parameter: 5649 . mat - the matrix 5650 5651 Level: advanced 5652 5653 Notes: 5654 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5655 must free the arrays once the matrix has been destroyed and not before. 5656 5657 The i and j indices are 0 based 5658 5659 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5660 5661 This sets local rows and cannot be used to set off-processor values. 5662 5663 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5664 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5665 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5666 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5667 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5668 communication if it is known that only local entries will be set. 5669 5670 .keywords: matrix, aij, compressed row, sparse, parallel 5671 5672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5673 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5674 C@*/ 5675 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5676 { 5677 PetscErrorCode ierr; 5678 Mat_MPIAIJ *maij; 5679 5680 PetscFunctionBegin; 5681 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5682 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5683 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5684 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5685 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5686 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5687 maij = (Mat_MPIAIJ*) (*mat)->data; 5688 5689 (*mat)->preallocated = PETSC_TRUE; 5690 5691 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5692 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5693 5694 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5696 5697 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5705 PetscFunctionReturn(0); 5706 } 5707 5708 /* 5709 Special version for direct calls from Fortran 5710 */ 5711 #include <petsc-private/fortranimpl.h> 5712 5713 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5714 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5716 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5717 #endif 5718 5719 /* Change these macros so can be used in void function */ 5720 #undef CHKERRQ 5721 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5722 #undef SETERRQ2 5723 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5724 #undef SETERRQ3 5725 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5726 #undef SETERRQ 5727 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5728 5729 #undef __FUNCT__ 5730 #define __FUNCT__ "matsetvaluesmpiaij_" 5731 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5732 { 5733 Mat mat = *mmat; 5734 PetscInt m = *mm, n = *mn; 5735 InsertMode addv = *maddv; 5736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5737 PetscScalar value; 5738 PetscErrorCode ierr; 5739 5740 MatCheckPreallocated(mat,1); 5741 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5742 5743 #if defined(PETSC_USE_DEBUG) 5744 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5745 #endif 5746 { 5747 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5748 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5749 PetscBool roworiented = aij->roworiented; 5750 5751 /* Some Variables required in the macro */ 5752 Mat A = aij->A; 5753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5754 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5755 MatScalar *aa = a->a; 5756 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5757 Mat B = aij->B; 5758 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5759 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5760 MatScalar *ba = b->a; 5761 5762 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5763 PetscInt nonew = a->nonew; 5764 MatScalar *ap1,*ap2; 5765 5766 PetscFunctionBegin; 5767 for (i=0; i<m; i++) { 5768 if (im[i] < 0) continue; 5769 #if defined(PETSC_USE_DEBUG) 5770 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5771 #endif 5772 if (im[i] >= rstart && im[i] < rend) { 5773 row = im[i] - rstart; 5774 lastcol1 = -1; 5775 rp1 = aj + ai[row]; 5776 ap1 = aa + ai[row]; 5777 rmax1 = aimax[row]; 5778 nrow1 = ailen[row]; 5779 low1 = 0; 5780 high1 = nrow1; 5781 lastcol2 = -1; 5782 rp2 = bj + bi[row]; 5783 ap2 = ba + bi[row]; 5784 rmax2 = bimax[row]; 5785 nrow2 = bilen[row]; 5786 low2 = 0; 5787 high2 = nrow2; 5788 5789 for (j=0; j<n; j++) { 5790 if (roworiented) value = v[i*n+j]; 5791 else value = v[i+j*m]; 5792 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5793 if (in[j] >= cstart && in[j] < cend) { 5794 col = in[j] - cstart; 5795 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5796 } else if (in[j] < 0) continue; 5797 #if defined(PETSC_USE_DEBUG) 5798 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5799 #endif 5800 else { 5801 if (mat->was_assembled) { 5802 if (!aij->colmap) { 5803 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5804 } 5805 #if defined(PETSC_USE_CTABLE) 5806 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5807 col--; 5808 #else 5809 col = aij->colmap[in[j]] - 1; 5810 #endif 5811 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5812 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5813 col = in[j]; 5814 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5815 B = aij->B; 5816 b = (Mat_SeqAIJ*)B->data; 5817 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5818 rp2 = bj + bi[row]; 5819 ap2 = ba + bi[row]; 5820 rmax2 = bimax[row]; 5821 nrow2 = bilen[row]; 5822 low2 = 0; 5823 high2 = nrow2; 5824 bm = aij->B->rmap->n; 5825 ba = b->a; 5826 } 5827 } else col = in[j]; 5828 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5829 } 5830 } 5831 } else if (!aij->donotstash) { 5832 if (roworiented) { 5833 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } else { 5835 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5836 } 5837 } 5838 } 5839 } 5840 PetscFunctionReturnVoid(); 5841 } 5842 5843