1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscMPIInt size = mat->size; 765 PetscSF sf; 766 PetscInt *lrows; 767 PetscSFNode *rrows; 768 PetscInt lastidx = -1, r, p = 0, len = 0; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 /* Create SF where leaves are input rows and roots are owned rows */ 773 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 774 for (r = 0; r < n; ++r) lrows[r] = -1; 775 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 776 for (r = 0; r < N; ++r) { 777 const PetscInt idx = rows[r]; 778 PetscBool found = PETSC_FALSE; 779 /* Trick for efficient searching for sorted rows */ 780 if (lastidx > idx) p = 0; 781 lastidx = idx; 782 for (; p < size; ++p) { 783 if (idx >= owners[p] && idx < owners[p+1]) { 784 rrows[r].rank = p; 785 rrows[r].index = rows[r] - owners[p]; 786 found = PETSC_TRUE; 787 break; 788 } 789 } 790 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 791 } 792 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 793 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 794 /* Collect flags for rows to be zeroed */ 795 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 796 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 797 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 798 /* Compress and put in row numbers */ 799 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 800 /* fix right hand side if needed */ 801 if (x && b) { 802 const PetscScalar *xx; 803 PetscScalar *bb; 804 805 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 806 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 807 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 808 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 809 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 810 } 811 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 812 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 813 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 814 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 815 } else if (diag != 0.0) { 816 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 817 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 818 for (r = 0; r < len; ++r) { 819 const PetscInt row = lrows[r] + A->rmap->rstart; 820 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 } else { 825 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 829 /* only change matrix nonzero state if pattern was allowed to be changed */ 830 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 831 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 832 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 842 PetscErrorCode ierr; 843 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 844 PetscInt i,j,r,m,p = 0,len = 0; 845 PetscInt *lrows,*owners = A->rmap->range; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 #if defined(PETSC_DEBUG) 855 PetscBool found = PETSC_FALSE; 856 #endif 857 858 PetscFunctionBegin; 859 /* Create SF where leaves are input rows and roots are owned rows */ 860 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 861 for (r = 0; r < n; ++r) lrows[r] = -1; 862 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 863 for (r = 0; r < N; ++r) { 864 const PetscInt idx = rows[r]; 865 PetscBool found = PETSC_FALSE; 866 /* Trick for efficient searching for sorted rows */ 867 if (lastidx > idx) p = 0; 868 lastidx = idx; 869 for (; p < size; ++p) { 870 if (idx >= owners[p] && idx < owners[p+1]) { 871 rrows[r].rank = p; 872 rrows[r].index = rows[r] - owners[p]; 873 found = PETSC_TRUE; 874 break; 875 } 876 } 877 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 878 } 879 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 880 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 881 /* Collect flags for rows to be zeroed */ 882 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 884 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 885 /* Compress and put in row numbers */ 886 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 887 /* zero diagonal part of matrix */ 888 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 889 /* handle off diagonal part of matrix */ 890 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 891 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 892 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 893 for (i=0; i<len; i++) bb[lrows[i]] = 1; 894 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 895 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 898 if (x) { 899 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 902 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 903 } 904 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 905 /* remove zeroed rows of off diagonal matrix */ 906 ii = aij->i; 907 for (i=0; i<len; i++) { 908 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 909 } 910 /* loop over all elements of off process part of matrix zeroing removed columns*/ 911 if (aij->compressedrow.use) { 912 m = aij->compressedrow.nrows; 913 ii = aij->compressedrow.i; 914 ridx = aij->compressedrow.rindex; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 920 for (j=0; j<n; j++) { 921 if (PetscAbsScalar(mask[*aj])) { 922 if (b) bb[*ridx] -= *aa*xx[*aj]; 923 *aa = 0.0; 924 } 925 aa++; 926 aj++; 927 } 928 ridx++; 929 } 930 } else { /* do not use compressed row format */ 931 m = l->B->rmap->n; 932 for (i=0; i<m; i++) { 933 n = ii[i+1] - ii[i]; 934 aj = aij->j + ii[i]; 935 aa = aij->a + ii[i]; 936 for (j=0; j<n; j++) { 937 if (PetscAbsScalar(mask[*aj])) { 938 if (b) bb[i] -= *aa*xx[*aj]; 939 *aa = 0.0; 940 } 941 aa++; 942 aj++; 943 } 944 } 945 } 946 if (x) { 947 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 948 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 949 } 950 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 951 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 952 ierr = PetscFree(lrows);CHKERRQ(ierr); 953 954 /* only change matrix nonzero state if pattern was allowed to be changed */ 955 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 956 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 957 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMult_MPIAIJ" 964 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 965 { 966 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 967 PetscErrorCode ierr; 968 PetscInt nt; 969 970 PetscFunctionBegin; 971 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 972 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 973 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 975 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultAdd_MPIAIJ" 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1009 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1010 { 1011 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1012 PetscErrorCode ierr; 1013 PetscBool merged; 1014 1015 PetscFunctionBegin; 1016 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 if (!merged) { 1020 /* send it on its way */ 1021 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1022 /* do local part */ 1023 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1024 /* receive remote parts: note this assumes the values are not actually */ 1025 /* added in yy until the next line, */ 1026 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1027 } else { 1028 /* do local part */ 1029 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1030 /* send it on its way */ 1031 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1032 /* values actually were received in the Begin() but we need to call this nop */ 1033 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1040 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1041 { 1042 MPI_Comm comm; 1043 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1044 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1045 IS Me,Notme; 1046 PetscErrorCode ierr; 1047 PetscInt M,N,first,last,*notme,i; 1048 PetscMPIInt size; 1049 1050 PetscFunctionBegin; 1051 /* Easy test: symmetric diagonal block */ 1052 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1053 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1054 if (!*f) PetscFunctionReturn(0); 1055 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1057 if (size == 1) PetscFunctionReturn(0); 1058 1059 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1060 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1062 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1063 for (i=0; i<first; i++) notme[i] = i; 1064 for (i=last; i<M; i++) notme[i-last+first] = i; 1065 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1066 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1067 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1068 Aoff = Aoffs[0]; 1069 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1070 Boff = Boffs[0]; 1071 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1072 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1073 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1074 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1075 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1076 ierr = PetscFree(notme);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1082 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 /* do nondiagonal part */ 1089 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1090 /* send it on its way */ 1091 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 /* do local part */ 1093 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1094 /* receive remote parts */ 1095 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1105 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1106 { 1107 PetscErrorCode ierr; 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 1110 PetscFunctionBegin; 1111 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1112 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1113 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatScale_MPIAIJ" 1119 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1126 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatDestroy_Redundant" 1132 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1133 { 1134 PetscErrorCode ierr; 1135 Mat_Redundant *redund = *redundant; 1136 PetscInt i; 1137 1138 PetscFunctionBegin; 1139 *redundant = NULL; 1140 if (redund){ 1141 if (redund->matseq) { /* via MatGetSubMatrices() */ 1142 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1143 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1144 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1145 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1146 } else { 1147 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1148 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1149 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1150 for (i=0; i<redund->nrecvs; i++) { 1151 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1152 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1153 } 1154 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1155 } 1156 1157 if (redund->psubcomm) { 1158 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1159 } 1160 ierr = PetscFree(redund);CHKERRQ(ierr); 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatDestroy_MPIAIJ" 1167 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1168 { 1169 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 #if defined(PETSC_USE_LOG) 1174 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1175 #endif 1176 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1177 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1178 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1179 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1180 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1181 #if defined(PETSC_USE_CTABLE) 1182 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1183 #else 1184 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1185 #endif 1186 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1187 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1188 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1189 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1190 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1191 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1192 1193 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1199 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1200 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1201 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1211 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1212 PetscErrorCode ierr; 1213 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1214 int fd; 1215 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1216 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1217 PetscScalar *column_values; 1218 PetscInt message_count,flowcontrolcount; 1219 FILE *file; 1220 1221 PetscFunctionBegin; 1222 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1223 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1224 nz = A->nz + B->nz; 1225 if (!rank) { 1226 header[0] = MAT_FILE_CLASSID; 1227 header[1] = mat->rmap->N; 1228 header[2] = mat->cmap->N; 1229 1230 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 /* get largest number of rows any processor has */ 1234 rlen = mat->rmap->n; 1235 range = mat->rmap->range; 1236 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1237 } else { 1238 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 rlen = mat->rmap->n; 1240 } 1241 1242 /* load up the local row counts */ 1243 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1244 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1245 1246 /* store the row lengths to the file */ 1247 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1248 if (!rank) { 1249 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1250 for (i=1; i<size; i++) { 1251 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1252 rlen = range[i+1] - range[i]; 1253 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1257 } else { 1258 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1259 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1263 1264 /* load up the local column indices */ 1265 nzmax = nz; /* th processor needs space a largest processor needs */ 1266 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1268 cnt = 0; 1269 for (i=0; i<mat->rmap->n; i++) { 1270 for (j=B->i[i]; j<B->i[i+1]; j++) { 1271 if ((col = garray[B->j[j]]) > cstart) break; 1272 column_indices[cnt++] = col; 1273 } 1274 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1275 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1276 } 1277 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1278 1279 /* store the column indices to the file */ 1280 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1281 if (!rank) { 1282 MPI_Status status; 1283 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 for (i=1; i<size; i++) { 1285 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1286 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1287 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1288 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1290 } 1291 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1294 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1299 1300 /* load up the local column values */ 1301 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1302 cnt = 0; 1303 for (i=0; i<mat->rmap->n; i++) { 1304 for (j=B->i[i]; j<B->i[i+1]; j++) { 1305 if (garray[B->j[j]] > cstart) break; 1306 column_values[cnt++] = B->a[j]; 1307 } 1308 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1309 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1310 } 1311 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1312 1313 /* store the column values to the file */ 1314 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1315 if (!rank) { 1316 MPI_Status status; 1317 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1318 for (i=1; i<size; i++) { 1319 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1320 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1321 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1322 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1324 } 1325 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1326 } else { 1327 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1328 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1329 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1330 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1331 } 1332 ierr = PetscFree(column_values);CHKERRQ(ierr); 1333 1334 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1335 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #include <petscdraw.h> 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1342 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 PetscErrorCode ierr; 1346 PetscMPIInt rank = aij->rank,size = aij->size; 1347 PetscBool isdraw,iascii,isbinary; 1348 PetscViewer sviewer; 1349 PetscViewerFormat format; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1353 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1354 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1355 if (iascii) { 1356 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1357 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1358 MatInfo info; 1359 PetscBool inodes; 1360 1361 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1362 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1363 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1365 if (!inodes) { 1366 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1367 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1368 } else { 1369 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1370 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1371 } 1372 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1374 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1375 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1378 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1379 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1382 PetscInt inodecount,inodelimit,*inodes; 1383 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1384 if (inodes) { 1385 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1391 PetscFunctionReturn(0); 1392 } 1393 } else if (isbinary) { 1394 if (size == 1) { 1395 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1396 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1397 } else { 1398 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } else if (isdraw) { 1402 PetscDraw draw; 1403 PetscBool isnull; 1404 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1405 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1406 } 1407 1408 { 1409 /* assemble the entire matrix onto first processor. */ 1410 Mat A; 1411 Mat_SeqAIJ *Aloc; 1412 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1413 MatScalar *a; 1414 1415 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1418 } else { 1419 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1420 } 1421 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1422 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1423 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1424 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1425 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1426 1427 /* copy over the A part */ 1428 Aloc = (Mat_SeqAIJ*)aij->A->data; 1429 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1430 row = mat->rmap->rstart; 1431 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; 1435 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1436 } 1437 aj = Aloc->j; 1438 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1439 1440 /* copy over the B part */ 1441 Aloc = (Mat_SeqAIJ*)aij->B->data; 1442 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1443 row = mat->rmap->rstart; 1444 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1445 ct = cols; 1446 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1447 for (i=0; i<m; i++) { 1448 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1449 row++; 1450 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1451 } 1452 ierr = PetscFree(ct);CHKERRQ(ierr); 1453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 /* 1456 Everyone has to call to draw the matrix since the graphics waits are 1457 synchronized across all processors that share the PetscDraw object 1458 */ 1459 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1460 if (!rank) { 1461 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1462 } 1463 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1464 ierr = MatDestroy(&A);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatView_MPIAIJ" 1471 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1472 { 1473 PetscErrorCode ierr; 1474 PetscBool iascii,isdraw,issocket,isbinary; 1475 1476 PetscFunctionBegin; 1477 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1478 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1479 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1480 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1481 if (iascii || isdraw || isbinary || issocket) { 1482 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatSOR_MPIAIJ" 1489 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1490 { 1491 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1492 PetscErrorCode ierr; 1493 Vec bb1 = 0; 1494 PetscBool hasop; 1495 1496 PetscFunctionBegin; 1497 if (flag == SOR_APPLY_UPPER) { 1498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1503 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1504 } 1505 1506 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 1512 while (its--) { 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 1516 /* update rhs: bb1 = bb - B*x */ 1517 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1518 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1522 } 1523 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1524 if (flag & SOR_ZERO_INITIAL_GUESS) { 1525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1526 its--; 1527 } 1528 while (its--) { 1529 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 1532 /* update rhs: bb1 = bb - B*x */ 1533 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1534 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1535 1536 /* local sweep */ 1537 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1538 } 1539 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1540 if (flag & SOR_ZERO_INITIAL_GUESS) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 its--; 1543 } 1544 while (its--) { 1545 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 1548 /* update rhs: bb1 = bb - B*x */ 1549 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1550 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1551 1552 /* local sweep */ 1553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1554 } 1555 } else if (flag & SOR_EISENSTAT) { 1556 Vec xx1; 1557 1558 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1560 1561 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 if (!mat->diag) { 1564 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1565 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1566 } 1567 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1568 if (hasop) { 1569 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1570 } else { 1571 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1572 } 1573 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1574 1575 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1579 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1580 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1581 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1582 1583 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatPermute_MPIAIJ" 1589 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1590 { 1591 Mat aA,aB,Aperm; 1592 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1593 PetscScalar *aa,*ba; 1594 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1595 PetscSF rowsf,sf; 1596 IS parcolp = NULL; 1597 PetscBool done; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1602 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1603 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1604 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1605 1606 /* Invert row permutation to find out where my rows should go */ 1607 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1608 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1609 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1610 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1611 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1612 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1613 1614 /* Invert column permutation to find out where my columns should go */ 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1619 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1620 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1621 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1622 1623 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1624 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1625 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1626 1627 /* Find out where my gcols should go */ 1628 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1629 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1630 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1631 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1632 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1633 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1634 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1636 1637 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1638 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1639 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 PetscInt row = rdest[i],rowner; 1642 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1643 for (j=ai[i]; j<ai[i+1]; j++) { 1644 PetscInt cowner,col = cdest[aj[j]]; 1645 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1646 if (rowner == cowner) dnnz[i]++; 1647 else onnz[i]++; 1648 } 1649 for (j=bi[i]; j<bi[i+1]; j++) { 1650 PetscInt cowner,col = gcdest[bj[j]]; 1651 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1652 if (rowner == cowner) dnnz[i]++; 1653 else onnz[i]++; 1654 } 1655 } 1656 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1657 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1658 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1659 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1660 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1661 1662 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1663 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1664 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1665 for (i=0; i<m; i++) { 1666 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1667 PetscInt j0,rowlen; 1668 rowlen = ai[i+1] - ai[i]; 1669 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1670 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1671 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 rowlen = bi[i+1] - bi[i]; 1674 for (j0=j=0; j<rowlen; j0=j) { 1675 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1676 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1677 } 1678 } 1679 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1684 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1685 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1686 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1687 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1688 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1689 *B = Aperm; 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1695 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1696 { 1697 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1698 Mat A = mat->A,B = mat->B; 1699 PetscErrorCode ierr; 1700 PetscReal isend[5],irecv[5]; 1701 1702 PetscFunctionBegin; 1703 info->block_size = 1.0; 1704 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1705 1706 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1707 isend[3] = info->memory; isend[4] = info->mallocs; 1708 1709 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1710 1711 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1712 isend[3] += info->memory; isend[4] += info->mallocs; 1713 if (flag == MAT_LOCAL) { 1714 info->nz_used = isend[0]; 1715 info->nz_allocated = isend[1]; 1716 info->nz_unneeded = isend[2]; 1717 info->memory = isend[3]; 1718 info->mallocs = isend[4]; 1719 } else if (flag == MAT_GLOBAL_MAX) { 1720 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } else if (flag == MAT_GLOBAL_SUM) { 1728 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1729 1730 info->nz_used = irecv[0]; 1731 info->nz_allocated = irecv[1]; 1732 info->nz_unneeded = irecv[2]; 1733 info->memory = irecv[3]; 1734 info->mallocs = irecv[4]; 1735 } 1736 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1737 info->fill_ratio_needed = 0; 1738 info->factor_mallocs = 0; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetOption_MPIAIJ" 1744 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 switch (op) { 1751 case MAT_NEW_NONZERO_LOCATIONS: 1752 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1753 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1754 case MAT_KEEP_NONZERO_PATTERN: 1755 case MAT_NEW_NONZERO_LOCATION_ERR: 1756 case MAT_USE_INODES: 1757 case MAT_IGNORE_ZERO_ENTRIES: 1758 MatCheckPreallocated(A,1); 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_ROW_ORIENTED: 1763 a->roworiented = flg; 1764 1765 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1766 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1767 break; 1768 case MAT_NEW_DIAGONALS: 1769 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1770 break; 1771 case MAT_IGNORE_OFF_PROC_ENTRIES: 1772 a->donotstash = flg; 1773 break; 1774 case MAT_SPD: 1775 A->spd_set = PETSC_TRUE; 1776 A->spd = flg; 1777 if (flg) { 1778 A->symmetric = PETSC_TRUE; 1779 A->structurally_symmetric = PETSC_TRUE; 1780 A->symmetric_set = PETSC_TRUE; 1781 A->structurally_symmetric_set = PETSC_TRUE; 1782 } 1783 break; 1784 case MAT_SYMMETRIC: 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_HERMITIAN: 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_SYMMETRY_ETERNAL: 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 break; 1796 default: 1797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatGetRow_MPIAIJ" 1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1805 { 1806 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1807 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1808 PetscErrorCode ierr; 1809 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1810 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1811 PetscInt *cmap,*idx_p; 1812 1813 PetscFunctionBegin; 1814 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1815 mat->getrowactive = PETSC_TRUE; 1816 1817 if (!mat->rowvalues && (idx || v)) { 1818 /* 1819 allocate enough space to hold information from the longest row. 1820 */ 1821 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1822 PetscInt max = 1,tmp; 1823 for (i=0; i<matin->rmap->n; i++) { 1824 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1825 if (max < tmp) max = tmp; 1826 } 1827 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1828 } 1829 1830 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1831 lrow = row - rstart; 1832 1833 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1834 if (!v) {pvA = 0; pvB = 0;} 1835 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1836 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1837 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1838 nztot = nzA + nzB; 1839 1840 cmap = mat->garray; 1841 if (v || idx) { 1842 if (nztot) { 1843 /* Sort by increasing column numbers, assuming A and B already sorted */ 1844 PetscInt imark = -1; 1845 if (v) { 1846 *v = v_p = mat->rowvalues; 1847 for (i=0; i<nzB; i++) { 1848 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1849 else break; 1850 } 1851 imark = i; 1852 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1853 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1854 } 1855 if (idx) { 1856 *idx = idx_p = mat->rowindices; 1857 if (imark > -1) { 1858 for (i=0; i<imark; i++) { 1859 idx_p[i] = cmap[cworkB[i]]; 1860 } 1861 } else { 1862 for (i=0; i<nzB; i++) { 1863 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1864 else break; 1865 } 1866 imark = i; 1867 } 1868 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1869 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1870 } 1871 } else { 1872 if (idx) *idx = 0; 1873 if (v) *v = 0; 1874 } 1875 } 1876 *nz = nztot; 1877 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1885 { 1886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1887 1888 PetscFunctionBegin; 1889 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1890 aij->getrowactive = PETSC_FALSE; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatNorm_MPIAIJ" 1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1897 { 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1899 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1900 PetscErrorCode ierr; 1901 PetscInt i,j,cstart = mat->cmap->rstart; 1902 PetscReal sum = 0.0; 1903 MatScalar *v; 1904 1905 PetscFunctionBegin; 1906 if (aij->size == 1) { 1907 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1908 } else { 1909 if (type == NORM_FROBENIUS) { 1910 v = amat->a; 1911 for (i=0; i<amat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 v = bmat->a; 1915 for (i=0; i<bmat->nz; i++) { 1916 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1917 } 1918 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1919 *norm = PetscSqrtReal(*norm); 1920 } else if (type == NORM_1) { /* max column norm */ 1921 PetscReal *tmp,*tmp2; 1922 PetscInt *jj,*garray = aij->garray; 1923 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1924 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1925 *norm = 0.0; 1926 v = amat->a; jj = amat->j; 1927 for (j=0; j<amat->nz; j++) { 1928 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1929 } 1930 v = bmat->a; jj = bmat->j; 1931 for (j=0; j<bmat->nz; j++) { 1932 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1933 } 1934 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 for (j=0; j<mat->cmap->N; j++) { 1936 if (tmp2[j] > *norm) *norm = tmp2[j]; 1937 } 1938 ierr = PetscFree(tmp);CHKERRQ(ierr); 1939 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1940 } else if (type == NORM_INFINITY) { /* max row norm */ 1941 PetscReal ntemp = 0.0; 1942 for (j=0; j<aij->A->rmap->n; j++) { 1943 v = amat->a + amat->i[j]; 1944 sum = 0.0; 1945 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1946 sum += PetscAbsScalar(*v); v++; 1947 } 1948 v = bmat->a + bmat->i[j]; 1949 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1950 sum += PetscAbsScalar(*v); v++; 1951 } 1952 if (sum > ntemp) ntemp = sum; 1953 } 1954 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1955 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatTranspose_MPIAIJ" 1962 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1963 { 1964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1965 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1966 PetscErrorCode ierr; 1967 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1968 PetscInt cstart = A->cmap->rstart,ncol; 1969 Mat B; 1970 MatScalar *array; 1971 1972 PetscFunctionBegin; 1973 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1974 1975 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1976 ai = Aloc->i; aj = Aloc->j; 1977 bi = Bloc->i; bj = Bloc->j; 1978 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1979 PetscInt *d_nnz,*g_nnz,*o_nnz; 1980 PetscSFNode *oloc; 1981 PETSC_UNUSED PetscSF sf; 1982 1983 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1984 /* compute d_nnz for preallocation */ 1985 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1986 for (i=0; i<ai[ma]; i++) { 1987 d_nnz[aj[i]]++; 1988 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1989 } 1990 /* compute local off-diagonal contributions */ 1991 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1993 /* map those to global */ 1994 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1995 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1996 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1997 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1998 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1999 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2000 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2001 2002 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2003 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2004 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2005 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2007 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2008 } else { 2009 B = *matout; 2010 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2011 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2012 } 2013 2014 /* copy over the A part */ 2015 array = Aloc->a; 2016 row = A->rmap->rstart; 2017 for (i=0; i<ma; i++) { 2018 ncol = ai[i+1]-ai[i]; 2019 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2020 row++; 2021 array += ncol; aj += ncol; 2022 } 2023 aj = Aloc->j; 2024 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2025 2026 /* copy over the B part */ 2027 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2028 array = Bloc->a; 2029 row = A->rmap->rstart; 2030 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2031 cols_tmp = cols; 2032 for (i=0; i<mb; i++) { 2033 ncol = bi[i+1]-bi[i]; 2034 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2035 row++; 2036 array += ncol; cols_tmp += ncol; 2037 } 2038 ierr = PetscFree(cols);CHKERRQ(ierr); 2039 2040 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2043 *matout = B; 2044 } else { 2045 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2046 } 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2052 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2053 { 2054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2055 Mat a = aij->A,b = aij->B; 2056 PetscErrorCode ierr; 2057 PetscInt s1,s2,s3; 2058 2059 PetscFunctionBegin; 2060 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2061 if (rr) { 2062 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2063 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2064 /* Overlap communication with computation. */ 2065 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 } 2067 if (ll) { 2068 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2069 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2070 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2071 } 2072 /* scale the diagonal block */ 2073 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2074 2075 if (rr) { 2076 /* Do a scatter end and then right scale the off-diagonal block */ 2077 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2078 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2085 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2086 { 2087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatEqual_MPIAIJ" 2097 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2098 { 2099 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2100 Mat a,b,c,d; 2101 PetscBool flg; 2102 PetscErrorCode ierr; 2103 2104 PetscFunctionBegin; 2105 a = matA->A; b = matA->B; 2106 c = matB->A; d = matB->B; 2107 2108 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2109 if (flg) { 2110 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2111 } 2112 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatCopy_MPIAIJ" 2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2123 2124 PetscFunctionBegin; 2125 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2126 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2127 /* because of the column compression in the off-processor part of the matrix a->B, 2128 the number of columns in a->B and b->B may be different, hence we cannot call 2129 the MatCopy() directly on the two parts. If need be, we can provide a more 2130 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2131 then copying the submatrices */ 2132 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2133 } else { 2134 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2135 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatSetUp_MPIAIJ" 2142 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2143 { 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 /* 2152 Computes the number of nonzeros per row needed for preallocation when X and Y 2153 have different nonzero structure. 2154 */ 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2157 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2158 { 2159 PetscInt i,j,k,nzx,nzy; 2160 2161 PetscFunctionBegin; 2162 /* Set the number of nonzeros in the new matrix */ 2163 for (i=0; i<m; i++) { 2164 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2165 nzx = xi[i+1] - xi[i]; 2166 nzy = yi[i+1] - yi[i]; 2167 nnz[i] = 0; 2168 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2169 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2170 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2171 nnz[i]++; 2172 } 2173 for (; k<nzy; k++) nnz[i]++; 2174 } 2175 PetscFunctionReturn(0); 2176 } 2177 2178 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2181 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2182 { 2183 PetscErrorCode ierr; 2184 PetscInt m = Y->rmap->N; 2185 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2186 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2187 2188 PetscFunctionBegin; 2189 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatAXPY_MPIAIJ" 2195 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2196 { 2197 PetscErrorCode ierr; 2198 PetscInt i; 2199 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2200 PetscBLASInt bnz,one=1; 2201 Mat_SeqAIJ *x,*y; 2202 2203 PetscFunctionBegin; 2204 if (str == SAME_NONZERO_PATTERN) { 2205 PetscScalar alpha = a; 2206 x = (Mat_SeqAIJ*)xx->A->data; 2207 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2208 y = (Mat_SeqAIJ*)yy->A->data; 2209 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2210 x = (Mat_SeqAIJ*)xx->B->data; 2211 y = (Mat_SeqAIJ*)yy->B->data; 2212 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2213 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2214 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2215 } else if (str == SUBSET_NONZERO_PATTERN) { 2216 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2217 2218 x = (Mat_SeqAIJ*)xx->B->data; 2219 y = (Mat_SeqAIJ*)yy->B->data; 2220 if (y->xtoy && y->XtoY != xx->B) { 2221 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2222 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2223 } 2224 if (!y->xtoy) { /* get xtoy */ 2225 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2226 y->XtoY = xx->B; 2227 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2228 } 2229 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2230 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2231 } else { 2232 Mat B; 2233 PetscInt *nnz_d,*nnz_o; 2234 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2235 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2236 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2237 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2238 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2239 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2240 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2241 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2242 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2244 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2245 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2246 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2247 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatConjugate_MPIAIJ" 2256 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2257 { 2258 #if defined(PETSC_USE_COMPLEX) 2259 PetscErrorCode ierr; 2260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2261 2262 PetscFunctionBegin; 2263 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2264 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2265 #else 2266 PetscFunctionBegin; 2267 #endif 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatRealPart_MPIAIJ" 2273 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2274 { 2275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2280 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2286 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2287 { 2288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2293 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2294 PetscFunctionReturn(0); 2295 } 2296 2297 #if defined(PETSC_HAVE_PBGL) 2298 2299 #include <boost/parallel/mpi/bsp_process_group.hpp> 2300 #include <boost/graph/distributed/ilu_default_graph.hpp> 2301 #include <boost/graph/distributed/ilu_0_block.hpp> 2302 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2303 #include <boost/graph/distributed/petsc/interface.hpp> 2304 #include <boost/multi_array.hpp> 2305 #include <boost/parallel/distributed_property_map->hpp> 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2309 /* 2310 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2311 */ 2312 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2313 { 2314 namespace petsc = boost::distributed::petsc; 2315 2316 namespace graph_dist = boost::graph::distributed; 2317 using boost::graph::distributed::ilu_default::process_group_type; 2318 using boost::graph::ilu_permuted; 2319 2320 PetscBool row_identity, col_identity; 2321 PetscContainer c; 2322 PetscInt m, n, M, N; 2323 PetscErrorCode ierr; 2324 2325 PetscFunctionBegin; 2326 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2327 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2328 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2329 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2330 2331 process_group_type pg; 2332 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2333 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2334 lgraph_type& level_graph = *lgraph_p; 2335 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2336 2337 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2338 ilu_permuted(level_graph); 2339 2340 /* put together the new matrix */ 2341 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2342 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2343 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2344 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2345 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2346 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2347 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2348 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2349 2350 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2351 ierr = PetscContainerSetPointer(c, lgraph_p); 2352 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2353 ierr = PetscContainerDestroy(&c); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 #undef __FUNCT__ 2358 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2359 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2360 { 2361 PetscFunctionBegin; 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatSolve_MPIAIJ" 2367 /* 2368 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2369 */ 2370 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2371 { 2372 namespace graph_dist = boost::graph::distributed; 2373 2374 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2375 lgraph_type *lgraph_p; 2376 PetscContainer c; 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2381 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2382 ierr = VecCopy(b, x);CHKERRQ(ierr); 2383 2384 PetscScalar *array_x; 2385 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2386 PetscInt sx; 2387 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2388 2389 PetscScalar *array_b; 2390 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2391 PetscInt sb; 2392 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2393 2394 lgraph_type& level_graph = *lgraph_p; 2395 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2396 2397 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2398 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2399 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2400 2401 typedef boost::iterator_property_map<array_ref_type::iterator, 2402 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2403 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2404 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2405 2406 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2407 PetscFunctionReturn(0); 2408 } 2409 #endif 2410 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2414 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2415 { 2416 PetscMPIInt rank,size; 2417 MPI_Comm comm; 2418 PetscErrorCode ierr; 2419 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2420 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2421 PetscInt *rowrange = mat->rmap->range; 2422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2423 Mat A = aij->A,B=aij->B,C=*matredundant; 2424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2425 PetscScalar *sbuf_a; 2426 PetscInt nzlocal=a->nz+b->nz; 2427 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2428 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2429 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2430 MatScalar *aworkA,*aworkB; 2431 PetscScalar *vals; 2432 PetscMPIInt tag1,tag2,tag3,imdex; 2433 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2434 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2435 MPI_Status recv_status,*send_status; 2436 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2437 PetscInt **rbuf_j=NULL; 2438 PetscScalar **rbuf_a=NULL; 2439 Mat_Redundant *redund =NULL; 2440 2441 PetscFunctionBegin; 2442 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2443 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2444 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2445 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2446 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2447 2448 if (reuse == MAT_REUSE_MATRIX) { 2449 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2450 if (subsize == 1) { 2451 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2452 redund = c->redundant; 2453 } else { 2454 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2455 redund = c->redundant; 2456 } 2457 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2458 2459 nsends = redund->nsends; 2460 nrecvs = redund->nrecvs; 2461 send_rank = redund->send_rank; 2462 recv_rank = redund->recv_rank; 2463 sbuf_nz = redund->sbuf_nz; 2464 rbuf_nz = redund->rbuf_nz; 2465 sbuf_j = redund->sbuf_j; 2466 sbuf_a = redund->sbuf_a; 2467 rbuf_j = redund->rbuf_j; 2468 rbuf_a = redund->rbuf_a; 2469 } 2470 2471 if (reuse == MAT_INITIAL_MATRIX) { 2472 PetscInt nleftover,np_subcomm; 2473 2474 /* get the destination processors' id send_rank, nsends and nrecvs */ 2475 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2476 2477 np_subcomm = size/nsubcomm; 2478 nleftover = size - nsubcomm*np_subcomm; 2479 2480 /* block of codes below is specific for INTERLACED */ 2481 /* ------------------------------------------------*/ 2482 nsends = 0; nrecvs = 0; 2483 for (i=0; i<size; i++) { 2484 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2485 send_rank[nsends++] = i; 2486 recv_rank[nrecvs++] = i; 2487 } 2488 } 2489 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2490 i = size-nleftover-1; 2491 j = 0; 2492 while (j < nsubcomm - nleftover) { 2493 send_rank[nsends++] = i; 2494 i--; j++; 2495 } 2496 } 2497 2498 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2499 for (i=0; i<nleftover; i++) { 2500 recv_rank[nrecvs++] = size-nleftover+i; 2501 } 2502 } 2503 /*----------------------------------------------*/ 2504 2505 /* allocate sbuf_j, sbuf_a */ 2506 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2507 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2508 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2509 /* 2510 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2511 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2512 */ 2513 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2514 2515 /* copy mat's local entries into the buffers */ 2516 if (reuse == MAT_INITIAL_MATRIX) { 2517 rownz_max = 0; 2518 rptr = sbuf_j; 2519 cols = sbuf_j + rend-rstart + 1; 2520 vals = sbuf_a; 2521 rptr[0] = 0; 2522 for (i=0; i<rend-rstart; i++) { 2523 row = i + rstart; 2524 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2525 ncols = nzA + nzB; 2526 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2527 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2528 /* load the column indices for this row into cols */ 2529 lwrite = 0; 2530 for (l=0; l<nzB; l++) { 2531 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2532 vals[lwrite] = aworkB[l]; 2533 cols[lwrite++] = ctmp; 2534 } 2535 } 2536 for (l=0; l<nzA; l++) { 2537 vals[lwrite] = aworkA[l]; 2538 cols[lwrite++] = cstart + cworkA[l]; 2539 } 2540 for (l=0; l<nzB; l++) { 2541 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2542 vals[lwrite] = aworkB[l]; 2543 cols[lwrite++] = ctmp; 2544 } 2545 } 2546 vals += ncols; 2547 cols += ncols; 2548 rptr[i+1] = rptr[i] + ncols; 2549 if (rownz_max < ncols) rownz_max = ncols; 2550 } 2551 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2552 } else { /* only copy matrix values into sbuf_a */ 2553 rptr = sbuf_j; 2554 vals = sbuf_a; 2555 rptr[0] = 0; 2556 for (i=0; i<rend-rstart; i++) { 2557 row = i + rstart; 2558 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2559 ncols = nzA + nzB; 2560 cworkB = b->j + b->i[i]; 2561 aworkA = a->a + a->i[i]; 2562 aworkB = b->a + b->i[i]; 2563 lwrite = 0; 2564 for (l=0; l<nzB; l++) { 2565 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2566 } 2567 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2568 for (l=0; l<nzB; l++) { 2569 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2570 } 2571 vals += ncols; 2572 rptr[i+1] = rptr[i] + ncols; 2573 } 2574 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2575 2576 /* send nzlocal to others, and recv other's nzlocal */ 2577 /*--------------------------------------------------*/ 2578 if (reuse == MAT_INITIAL_MATRIX) { 2579 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2580 2581 s_waits2 = s_waits3 + nsends; 2582 s_waits1 = s_waits2 + nsends; 2583 r_waits1 = s_waits1 + nsends; 2584 r_waits2 = r_waits1 + nrecvs; 2585 r_waits3 = r_waits2 + nrecvs; 2586 } else { 2587 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2588 2589 r_waits3 = s_waits3 + nsends; 2590 } 2591 2592 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2593 if (reuse == MAT_INITIAL_MATRIX) { 2594 /* get new tags to keep the communication clean */ 2595 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2596 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2597 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2598 2599 /* post receives of other's nzlocal */ 2600 for (i=0; i<nrecvs; i++) { 2601 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2602 } 2603 /* send nzlocal to others */ 2604 for (i=0; i<nsends; i++) { 2605 sbuf_nz[i] = nzlocal; 2606 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2607 } 2608 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2609 count = nrecvs; 2610 while (count) { 2611 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2612 2613 recv_rank[imdex] = recv_status.MPI_SOURCE; 2614 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2615 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2616 2617 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2618 2619 rbuf_nz[imdex] += i + 2; 2620 2621 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2622 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2623 count--; 2624 } 2625 /* wait on sends of nzlocal */ 2626 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2627 /* send mat->i,j to others, and recv from other's */ 2628 /*------------------------------------------------*/ 2629 for (i=0; i<nsends; i++) { 2630 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2631 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2632 } 2633 /* wait on receives of mat->i,j */ 2634 /*------------------------------*/ 2635 count = nrecvs; 2636 while (count) { 2637 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2638 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2639 count--; 2640 } 2641 /* wait on sends of mat->i,j */ 2642 /*---------------------------*/ 2643 if (nsends) { 2644 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2645 } 2646 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2647 2648 /* post receives, send and receive mat->a */ 2649 /*----------------------------------------*/ 2650 for (imdex=0; imdex<nrecvs; imdex++) { 2651 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2652 } 2653 for (i=0; i<nsends; i++) { 2654 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2655 } 2656 count = nrecvs; 2657 while (count) { 2658 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2659 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2660 count--; 2661 } 2662 if (nsends) { 2663 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2664 } 2665 2666 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2667 2668 /* create redundant matrix */ 2669 /*-------------------------*/ 2670 if (reuse == MAT_INITIAL_MATRIX) { 2671 const PetscInt *range; 2672 PetscInt rstart_sub,rend_sub,mloc_sub; 2673 2674 /* compute rownz_max for preallocation */ 2675 for (imdex=0; imdex<nrecvs; imdex++) { 2676 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2677 rptr = rbuf_j[imdex]; 2678 for (i=0; i<j; i++) { 2679 ncols = rptr[i+1] - rptr[i]; 2680 if (rownz_max < ncols) rownz_max = ncols; 2681 } 2682 } 2683 2684 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2685 2686 /* get local size of redundant matrix 2687 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2688 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2689 rstart_sub = range[nsubcomm*subrank]; 2690 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2691 rend_sub = range[nsubcomm*(subrank+1)]; 2692 } else { 2693 rend_sub = mat->rmap->N; 2694 } 2695 mloc_sub = rend_sub - rstart_sub; 2696 2697 if (M == N) { 2698 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2699 } else { /* non-square matrix */ 2700 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2701 } 2702 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2703 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2704 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2705 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2706 } else { 2707 C = *matredundant; 2708 } 2709 2710 /* insert local matrix entries */ 2711 rptr = sbuf_j; 2712 cols = sbuf_j + rend-rstart + 1; 2713 vals = sbuf_a; 2714 for (i=0; i<rend-rstart; i++) { 2715 row = i + rstart; 2716 ncols = rptr[i+1] - rptr[i]; 2717 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2718 vals += ncols; 2719 cols += ncols; 2720 } 2721 /* insert received matrix entries */ 2722 for (imdex=0; imdex<nrecvs; imdex++) { 2723 rstart = rowrange[recv_rank[imdex]]; 2724 rend = rowrange[recv_rank[imdex]+1]; 2725 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2726 rptr = rbuf_j[imdex]; 2727 cols = rbuf_j[imdex] + rend-rstart + 1; 2728 vals = rbuf_a[imdex]; 2729 for (i=0; i<rend-rstart; i++) { 2730 row = i + rstart; 2731 ncols = rptr[i+1] - rptr[i]; 2732 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2733 vals += ncols; 2734 cols += ncols; 2735 } 2736 } 2737 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2738 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2739 2740 if (reuse == MAT_INITIAL_MATRIX) { 2741 *matredundant = C; 2742 2743 /* create a supporting struct and attach it to C for reuse */ 2744 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2745 if (subsize == 1) { 2746 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2747 c->redundant = redund; 2748 } else { 2749 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2750 c->redundant = redund; 2751 } 2752 2753 redund->nzlocal = nzlocal; 2754 redund->nsends = nsends; 2755 redund->nrecvs = nrecvs; 2756 redund->send_rank = send_rank; 2757 redund->recv_rank = recv_rank; 2758 redund->sbuf_nz = sbuf_nz; 2759 redund->rbuf_nz = rbuf_nz; 2760 redund->sbuf_j = sbuf_j; 2761 redund->sbuf_a = sbuf_a; 2762 redund->rbuf_j = rbuf_j; 2763 redund->rbuf_a = rbuf_a; 2764 redund->psubcomm = NULL; 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2771 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2772 { 2773 PetscErrorCode ierr; 2774 MPI_Comm comm; 2775 PetscMPIInt size,subsize; 2776 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2777 Mat_Redundant *redund=NULL; 2778 PetscSubcomm psubcomm=NULL; 2779 MPI_Comm subcomm_in=subcomm; 2780 Mat *matseq; 2781 IS isrow,iscol; 2782 2783 PetscFunctionBegin; 2784 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2785 if (reuse == MAT_INITIAL_MATRIX) { 2786 /* create psubcomm, then get subcomm */ 2787 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2788 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2789 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2790 2791 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2792 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2793 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2794 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2795 subcomm = psubcomm->comm; 2796 } else { /* retrieve psubcomm and subcomm */ 2797 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2798 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2799 if (subsize == 1) { 2800 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2801 redund = c->redundant; 2802 } else { 2803 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2804 redund = c->redundant; 2805 } 2806 psubcomm = redund->psubcomm; 2807 } 2808 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2809 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2810 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2811 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2812 if (subsize == 1) { 2813 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2814 c->redundant->psubcomm = psubcomm; 2815 } else { 2816 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2817 c->redundant->psubcomm = psubcomm ; 2818 } 2819 } 2820 PetscFunctionReturn(0); 2821 } 2822 } 2823 2824 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2825 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2826 if (reuse == MAT_INITIAL_MATRIX) { 2827 /* create a local sequential matrix matseq[0] */ 2828 mloc_sub = PETSC_DECIDE; 2829 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2830 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2831 rstart = rend - mloc_sub; 2832 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2833 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2834 } else { /* reuse == MAT_REUSE_MATRIX */ 2835 if (subsize == 1) { 2836 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2837 redund = c->redundant; 2838 } else { 2839 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2840 redund = c->redundant; 2841 } 2842 2843 isrow = redund->isrow; 2844 iscol = redund->iscol; 2845 matseq = redund->matseq; 2846 } 2847 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2848 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2849 2850 if (reuse == MAT_INITIAL_MATRIX) { 2851 /* create a supporting struct and attach it to C for reuse */ 2852 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2853 if (subsize == 1) { 2854 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2855 c->redundant = redund; 2856 } else { 2857 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2858 c->redundant = redund; 2859 } 2860 redund->isrow = isrow; 2861 redund->iscol = iscol; 2862 redund->matseq = matseq; 2863 redund->psubcomm = psubcomm; 2864 } 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2870 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2871 { 2872 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2873 PetscErrorCode ierr; 2874 PetscInt i,*idxb = 0; 2875 PetscScalar *va,*vb; 2876 Vec vtmp; 2877 2878 PetscFunctionBegin; 2879 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2880 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2881 if (idx) { 2882 for (i=0; i<A->rmap->n; i++) { 2883 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2884 } 2885 } 2886 2887 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2888 if (idx) { 2889 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2892 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2893 2894 for (i=0; i<A->rmap->n; i++) { 2895 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2896 va[i] = vb[i]; 2897 if (idx) idx[i] = a->garray[idxb[i]]; 2898 } 2899 } 2900 2901 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2903 ierr = PetscFree(idxb);CHKERRQ(ierr); 2904 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 #undef __FUNCT__ 2909 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2910 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2911 { 2912 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2913 PetscErrorCode ierr; 2914 PetscInt i,*idxb = 0; 2915 PetscScalar *va,*vb; 2916 Vec vtmp; 2917 2918 PetscFunctionBegin; 2919 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2920 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2921 if (idx) { 2922 for (i=0; i<A->cmap->n; i++) { 2923 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2924 } 2925 } 2926 2927 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2928 if (idx) { 2929 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2930 } 2931 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2932 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2933 2934 for (i=0; i<A->rmap->n; i++) { 2935 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2936 va[i] = vb[i]; 2937 if (idx) idx[i] = a->garray[idxb[i]]; 2938 } 2939 } 2940 2941 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2943 ierr = PetscFree(idxb);CHKERRQ(ierr); 2944 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2945 PetscFunctionReturn(0); 2946 } 2947 2948 #undef __FUNCT__ 2949 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2950 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2951 { 2952 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2953 PetscInt n = A->rmap->n; 2954 PetscInt cstart = A->cmap->rstart; 2955 PetscInt *cmap = mat->garray; 2956 PetscInt *diagIdx, *offdiagIdx; 2957 Vec diagV, offdiagV; 2958 PetscScalar *a, *diagA, *offdiagA; 2959 PetscInt r; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2964 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2965 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2966 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2967 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2968 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2969 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2970 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2971 for (r = 0; r < n; ++r) { 2972 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2973 a[r] = diagA[r]; 2974 idx[r] = cstart + diagIdx[r]; 2975 } else { 2976 a[r] = offdiagA[r]; 2977 idx[r] = cmap[offdiagIdx[r]]; 2978 } 2979 } 2980 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2981 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2983 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2984 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2985 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2986 PetscFunctionReturn(0); 2987 } 2988 2989 #undef __FUNCT__ 2990 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2991 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2992 { 2993 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2994 PetscInt n = A->rmap->n; 2995 PetscInt cstart = A->cmap->rstart; 2996 PetscInt *cmap = mat->garray; 2997 PetscInt *diagIdx, *offdiagIdx; 2998 Vec diagV, offdiagV; 2999 PetscScalar *a, *diagA, *offdiagA; 3000 PetscInt r; 3001 PetscErrorCode ierr; 3002 3003 PetscFunctionBegin; 3004 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3005 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3006 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3007 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3008 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3009 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3010 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3011 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3012 for (r = 0; r < n; ++r) { 3013 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3014 a[r] = diagA[r]; 3015 idx[r] = cstart + diagIdx[r]; 3016 } else { 3017 a[r] = offdiagA[r]; 3018 idx[r] = cmap[offdiagIdx[r]]; 3019 } 3020 } 3021 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3022 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3023 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3024 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3025 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3026 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3027 PetscFunctionReturn(0); 3028 } 3029 3030 #undef __FUNCT__ 3031 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3032 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3033 { 3034 PetscErrorCode ierr; 3035 Mat *dummy; 3036 3037 PetscFunctionBegin; 3038 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3039 *newmat = *dummy; 3040 ierr = PetscFree(dummy);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3046 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3047 { 3048 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3058 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3059 { 3060 PetscErrorCode ierr; 3061 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3062 3063 PetscFunctionBegin; 3064 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3065 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3066 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 /* -------------------------------------------------------------------*/ 3072 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3073 MatGetRow_MPIAIJ, 3074 MatRestoreRow_MPIAIJ, 3075 MatMult_MPIAIJ, 3076 /* 4*/ MatMultAdd_MPIAIJ, 3077 MatMultTranspose_MPIAIJ, 3078 MatMultTransposeAdd_MPIAIJ, 3079 #if defined(PETSC_HAVE_PBGL) 3080 MatSolve_MPIAIJ, 3081 #else 3082 0, 3083 #endif 3084 0, 3085 0, 3086 /*10*/ 0, 3087 0, 3088 0, 3089 MatSOR_MPIAIJ, 3090 MatTranspose_MPIAIJ, 3091 /*15*/ MatGetInfo_MPIAIJ, 3092 MatEqual_MPIAIJ, 3093 MatGetDiagonal_MPIAIJ, 3094 MatDiagonalScale_MPIAIJ, 3095 MatNorm_MPIAIJ, 3096 /*20*/ MatAssemblyBegin_MPIAIJ, 3097 MatAssemblyEnd_MPIAIJ, 3098 MatSetOption_MPIAIJ, 3099 MatZeroEntries_MPIAIJ, 3100 /*24*/ MatZeroRows_MPIAIJ, 3101 0, 3102 #if defined(PETSC_HAVE_PBGL) 3103 0, 3104 #else 3105 0, 3106 #endif 3107 0, 3108 0, 3109 /*29*/ MatSetUp_MPIAIJ, 3110 #if defined(PETSC_HAVE_PBGL) 3111 0, 3112 #else 3113 0, 3114 #endif 3115 0, 3116 0, 3117 0, 3118 /*34*/ MatDuplicate_MPIAIJ, 3119 0, 3120 0, 3121 0, 3122 0, 3123 /*39*/ MatAXPY_MPIAIJ, 3124 MatGetSubMatrices_MPIAIJ, 3125 MatIncreaseOverlap_MPIAIJ, 3126 MatGetValues_MPIAIJ, 3127 MatCopy_MPIAIJ, 3128 /*44*/ MatGetRowMax_MPIAIJ, 3129 MatScale_MPIAIJ, 3130 0, 3131 MatDiagonalSet_MPIAIJ, 3132 MatZeroRowsColumns_MPIAIJ, 3133 /*49*/ MatSetRandom_MPIAIJ, 3134 0, 3135 0, 3136 0, 3137 0, 3138 /*54*/ MatFDColoringCreate_MPIXAIJ, 3139 0, 3140 MatSetUnfactored_MPIAIJ, 3141 MatPermute_MPIAIJ, 3142 0, 3143 /*59*/ MatGetSubMatrix_MPIAIJ, 3144 MatDestroy_MPIAIJ, 3145 MatView_MPIAIJ, 3146 0, 3147 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3148 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3149 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3150 0, 3151 0, 3152 0, 3153 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3154 MatGetRowMinAbs_MPIAIJ, 3155 0, 3156 MatSetColoring_MPIAIJ, 3157 0, 3158 MatSetValuesAdifor_MPIAIJ, 3159 /*75*/ MatFDColoringApply_AIJ, 3160 0, 3161 0, 3162 0, 3163 MatFindZeroDiagonals_MPIAIJ, 3164 /*80*/ 0, 3165 0, 3166 0, 3167 /*83*/ MatLoad_MPIAIJ, 3168 0, 3169 0, 3170 0, 3171 0, 3172 0, 3173 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3174 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3175 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3176 MatPtAP_MPIAIJ_MPIAIJ, 3177 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3178 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3179 0, 3180 0, 3181 0, 3182 0, 3183 /*99*/ 0, 3184 0, 3185 0, 3186 MatConjugate_MPIAIJ, 3187 0, 3188 /*104*/MatSetValuesRow_MPIAIJ, 3189 MatRealPart_MPIAIJ, 3190 MatImaginaryPart_MPIAIJ, 3191 0, 3192 0, 3193 /*109*/0, 3194 MatGetRedundantMatrix_MPIAIJ, 3195 MatGetRowMin_MPIAIJ, 3196 0, 3197 0, 3198 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3199 0, 3200 0, 3201 0, 3202 0, 3203 /*119*/0, 3204 0, 3205 0, 3206 0, 3207 MatGetMultiProcBlock_MPIAIJ, 3208 /*124*/MatFindNonzeroRows_MPIAIJ, 3209 MatGetColumnNorms_MPIAIJ, 3210 MatInvertBlockDiagonal_MPIAIJ, 3211 0, 3212 MatGetSubMatricesParallel_MPIAIJ, 3213 /*129*/0, 3214 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3215 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3216 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3217 0, 3218 /*134*/0, 3219 0, 3220 0, 3221 0, 3222 0, 3223 /*139*/0, 3224 0, 3225 0, 3226 MatFDColoringSetUp_MPIXAIJ 3227 }; 3228 3229 /* ----------------------------------------------------------------------------------------*/ 3230 3231 #undef __FUNCT__ 3232 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3233 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3234 { 3235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3240 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3246 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3247 { 3248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3253 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3254 PetscFunctionReturn(0); 3255 } 3256 3257 #undef __FUNCT__ 3258 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3259 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3260 { 3261 Mat_MPIAIJ *b; 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3266 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3267 b = (Mat_MPIAIJ*)B->data; 3268 3269 if (!B->preallocated) { 3270 /* Explicitly create 2 MATSEQAIJ matrices. */ 3271 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3272 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3274 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3275 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3276 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3277 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3278 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3279 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3280 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3281 } 3282 3283 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3284 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3285 B->preallocated = PETSC_TRUE; 3286 PetscFunctionReturn(0); 3287 } 3288 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3291 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3292 { 3293 Mat mat; 3294 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3295 PetscErrorCode ierr; 3296 3297 PetscFunctionBegin; 3298 *newmat = 0; 3299 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3300 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3301 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3302 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3303 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3304 a = (Mat_MPIAIJ*)mat->data; 3305 3306 mat->factortype = matin->factortype; 3307 mat->assembled = PETSC_TRUE; 3308 mat->insertmode = NOT_SET_VALUES; 3309 mat->preallocated = PETSC_TRUE; 3310 3311 a->size = oldmat->size; 3312 a->rank = oldmat->rank; 3313 a->donotstash = oldmat->donotstash; 3314 a->roworiented = oldmat->roworiented; 3315 a->rowindices = 0; 3316 a->rowvalues = 0; 3317 a->getrowactive = PETSC_FALSE; 3318 3319 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3320 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3321 3322 if (oldmat->colmap) { 3323 #if defined(PETSC_USE_CTABLE) 3324 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3325 #else 3326 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3327 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3328 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3329 #endif 3330 } else a->colmap = 0; 3331 if (oldmat->garray) { 3332 PetscInt len; 3333 len = oldmat->B->cmap->n; 3334 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3335 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3336 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3337 } else a->garray = 0; 3338 3339 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3340 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3341 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3342 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3343 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3344 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3345 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3346 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3347 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3348 *newmat = mat; 3349 PetscFunctionReturn(0); 3350 } 3351 3352 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatLoad_MPIAIJ" 3356 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3357 { 3358 PetscScalar *vals,*svals; 3359 MPI_Comm comm; 3360 PetscErrorCode ierr; 3361 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3362 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3363 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3364 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3365 PetscInt cend,cstart,n,*rowners,sizesset=1; 3366 int fd; 3367 PetscInt bs = 1; 3368 3369 PetscFunctionBegin; 3370 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3371 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3372 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3373 if (!rank) { 3374 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3375 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3376 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3377 } 3378 3379 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3380 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3381 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3382 3383 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3384 3385 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3386 M = header[1]; N = header[2]; 3387 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3388 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3389 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3390 3391 /* If global sizes are set, check if they are consistent with that given in the file */ 3392 if (sizesset) { 3393 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3394 } 3395 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3396 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3397 3398 /* determine ownership of all (block) rows */ 3399 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3400 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3401 else m = newMat->rmap->n; /* Set by user */ 3402 3403 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3404 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3405 3406 /* First process needs enough room for process with most rows */ 3407 if (!rank) { 3408 mmax = rowners[1]; 3409 for (i=2; i<=size; i++) { 3410 mmax = PetscMax(mmax, rowners[i]); 3411 } 3412 } else mmax = -1; /* unused, but compilers complain */ 3413 3414 rowners[0] = 0; 3415 for (i=2; i<=size; i++) { 3416 rowners[i] += rowners[i-1]; 3417 } 3418 rstart = rowners[rank]; 3419 rend = rowners[rank+1]; 3420 3421 /* distribute row lengths to all processors */ 3422 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3423 if (!rank) { 3424 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3425 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3426 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3427 for (j=0; j<m; j++) { 3428 procsnz[0] += ourlens[j]; 3429 } 3430 for (i=1; i<size; i++) { 3431 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3432 /* calculate the number of nonzeros on each processor */ 3433 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3434 procsnz[i] += rowlengths[j]; 3435 } 3436 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3437 } 3438 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3439 } else { 3440 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3441 } 3442 3443 if (!rank) { 3444 /* determine max buffer needed and allocate it */ 3445 maxnz = 0; 3446 for (i=0; i<size; i++) { 3447 maxnz = PetscMax(maxnz,procsnz[i]); 3448 } 3449 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3450 3451 /* read in my part of the matrix column indices */ 3452 nz = procsnz[0]; 3453 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3454 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3455 3456 /* read in every one elses and ship off */ 3457 for (i=1; i<size; i++) { 3458 nz = procsnz[i]; 3459 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3460 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3461 } 3462 ierr = PetscFree(cols);CHKERRQ(ierr); 3463 } else { 3464 /* determine buffer space needed for message */ 3465 nz = 0; 3466 for (i=0; i<m; i++) { 3467 nz += ourlens[i]; 3468 } 3469 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3470 3471 /* receive message of column indices*/ 3472 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3473 } 3474 3475 /* determine column ownership if matrix is not square */ 3476 if (N != M) { 3477 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3478 else n = newMat->cmap->n; 3479 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3480 cstart = cend - n; 3481 } else { 3482 cstart = rstart; 3483 cend = rend; 3484 n = cend - cstart; 3485 } 3486 3487 /* loop over local rows, determining number of off diagonal entries */ 3488 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3489 jj = 0; 3490 for (i=0; i<m; i++) { 3491 for (j=0; j<ourlens[i]; j++) { 3492 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3493 jj++; 3494 } 3495 } 3496 3497 for (i=0; i<m; i++) { 3498 ourlens[i] -= offlens[i]; 3499 } 3500 if (!sizesset) { 3501 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3502 } 3503 3504 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3505 3506 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3507 3508 for (i=0; i<m; i++) { 3509 ourlens[i] += offlens[i]; 3510 } 3511 3512 if (!rank) { 3513 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3514 3515 /* read in my part of the matrix numerical values */ 3516 nz = procsnz[0]; 3517 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3518 3519 /* insert into matrix */ 3520 jj = rstart; 3521 smycols = mycols; 3522 svals = vals; 3523 for (i=0; i<m; i++) { 3524 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3525 smycols += ourlens[i]; 3526 svals += ourlens[i]; 3527 jj++; 3528 } 3529 3530 /* read in other processors and ship out */ 3531 for (i=1; i<size; i++) { 3532 nz = procsnz[i]; 3533 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3534 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3535 } 3536 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3537 } else { 3538 /* receive numeric values */ 3539 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3540 3541 /* receive message of values*/ 3542 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3543 3544 /* insert into matrix */ 3545 jj = rstart; 3546 smycols = mycols; 3547 svals = vals; 3548 for (i=0; i<m; i++) { 3549 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3550 smycols += ourlens[i]; 3551 svals += ourlens[i]; 3552 jj++; 3553 } 3554 } 3555 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3556 ierr = PetscFree(vals);CHKERRQ(ierr); 3557 ierr = PetscFree(mycols);CHKERRQ(ierr); 3558 ierr = PetscFree(rowners);CHKERRQ(ierr); 3559 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3560 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 #undef __FUNCT__ 3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3566 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3567 { 3568 PetscErrorCode ierr; 3569 IS iscol_local; 3570 PetscInt csize; 3571 3572 PetscFunctionBegin; 3573 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3574 if (call == MAT_REUSE_MATRIX) { 3575 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3576 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3577 } else { 3578 PetscInt cbs; 3579 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3580 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3581 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3582 } 3583 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3584 if (call == MAT_INITIAL_MATRIX) { 3585 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3586 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3587 } 3588 PetscFunctionReturn(0); 3589 } 3590 3591 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3592 #undef __FUNCT__ 3593 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3594 /* 3595 Not great since it makes two copies of the submatrix, first an SeqAIJ 3596 in local and then by concatenating the local matrices the end result. 3597 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3598 3599 Note: This requires a sequential iscol with all indices. 3600 */ 3601 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3602 { 3603 PetscErrorCode ierr; 3604 PetscMPIInt rank,size; 3605 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3606 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3607 PetscBool allcolumns, colflag; 3608 Mat M,Mreuse; 3609 MatScalar *vwork,*aa; 3610 MPI_Comm comm; 3611 Mat_SeqAIJ *aij; 3612 3613 PetscFunctionBegin; 3614 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3615 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3616 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3617 3618 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3619 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3620 if (colflag && ncol == mat->cmap->N) { 3621 allcolumns = PETSC_TRUE; 3622 } else { 3623 allcolumns = PETSC_FALSE; 3624 } 3625 if (call == MAT_REUSE_MATRIX) { 3626 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3627 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3628 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3629 } else { 3630 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3631 } 3632 3633 /* 3634 m - number of local rows 3635 n - number of columns (same on all processors) 3636 rstart - first row in new global matrix generated 3637 */ 3638 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3639 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3640 if (call == MAT_INITIAL_MATRIX) { 3641 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3642 ii = aij->i; 3643 jj = aij->j; 3644 3645 /* 3646 Determine the number of non-zeros in the diagonal and off-diagonal 3647 portions of the matrix in order to do correct preallocation 3648 */ 3649 3650 /* first get start and end of "diagonal" columns */ 3651 if (csize == PETSC_DECIDE) { 3652 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3653 if (mglobal == n) { /* square matrix */ 3654 nlocal = m; 3655 } else { 3656 nlocal = n/size + ((n % size) > rank); 3657 } 3658 } else { 3659 nlocal = csize; 3660 } 3661 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3662 rstart = rend - nlocal; 3663 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3664 3665 /* next, compute all the lengths */ 3666 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3667 olens = dlens + m; 3668 for (i=0; i<m; i++) { 3669 jend = ii[i+1] - ii[i]; 3670 olen = 0; 3671 dlen = 0; 3672 for (j=0; j<jend; j++) { 3673 if (*jj < rstart || *jj >= rend) olen++; 3674 else dlen++; 3675 jj++; 3676 } 3677 olens[i] = olen; 3678 dlens[i] = dlen; 3679 } 3680 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3681 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3682 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3683 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3684 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3685 ierr = PetscFree(dlens);CHKERRQ(ierr); 3686 } else { 3687 PetscInt ml,nl; 3688 3689 M = *newmat; 3690 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3691 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3692 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3693 /* 3694 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3695 rather than the slower MatSetValues(). 3696 */ 3697 M->was_assembled = PETSC_TRUE; 3698 M->assembled = PETSC_FALSE; 3699 } 3700 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3701 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3702 ii = aij->i; 3703 jj = aij->j; 3704 aa = aij->a; 3705 for (i=0; i<m; i++) { 3706 row = rstart + i; 3707 nz = ii[i+1] - ii[i]; 3708 cwork = jj; jj += nz; 3709 vwork = aa; aa += nz; 3710 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3711 } 3712 3713 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3714 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3715 *newmat = M; 3716 3717 /* save submatrix used in processor for next request */ 3718 if (call == MAT_INITIAL_MATRIX) { 3719 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3720 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3721 } 3722 PetscFunctionReturn(0); 3723 } 3724 3725 #undef __FUNCT__ 3726 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3727 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3728 { 3729 PetscInt m,cstart, cend,j,nnz,i,d; 3730 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3731 const PetscInt *JJ; 3732 PetscScalar *values; 3733 PetscErrorCode ierr; 3734 3735 PetscFunctionBegin; 3736 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3737 3738 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3739 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3740 m = B->rmap->n; 3741 cstart = B->cmap->rstart; 3742 cend = B->cmap->rend; 3743 rstart = B->rmap->rstart; 3744 3745 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3746 3747 #if defined(PETSC_USE_DEBUGGING) 3748 for (i=0; i<m; i++) { 3749 nnz = Ii[i+1]- Ii[i]; 3750 JJ = J + Ii[i]; 3751 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3752 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3753 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3754 } 3755 #endif 3756 3757 for (i=0; i<m; i++) { 3758 nnz = Ii[i+1]- Ii[i]; 3759 JJ = J + Ii[i]; 3760 nnz_max = PetscMax(nnz_max,nnz); 3761 d = 0; 3762 for (j=0; j<nnz; j++) { 3763 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3764 } 3765 d_nnz[i] = d; 3766 o_nnz[i] = nnz - d; 3767 } 3768 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3769 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3770 3771 if (v) values = (PetscScalar*)v; 3772 else { 3773 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3774 } 3775 3776 for (i=0; i<m; i++) { 3777 ii = i + rstart; 3778 nnz = Ii[i+1]- Ii[i]; 3779 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3780 } 3781 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3783 3784 if (!v) { 3785 ierr = PetscFree(values);CHKERRQ(ierr); 3786 } 3787 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3788 PetscFunctionReturn(0); 3789 } 3790 3791 #undef __FUNCT__ 3792 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3793 /*@ 3794 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3795 (the default parallel PETSc format). 3796 3797 Collective on MPI_Comm 3798 3799 Input Parameters: 3800 + B - the matrix 3801 . i - the indices into j for the start of each local row (starts with zero) 3802 . j - the column indices for each local row (starts with zero) 3803 - v - optional values in the matrix 3804 3805 Level: developer 3806 3807 Notes: 3808 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3809 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3810 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3811 3812 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3813 3814 The format which is used for the sparse matrix input, is equivalent to a 3815 row-major ordering.. i.e for the following matrix, the input data expected is 3816 as shown: 3817 3818 1 0 0 3819 2 0 3 P0 3820 ------- 3821 4 5 6 P1 3822 3823 Process0 [P0]: rows_owned=[0,1] 3824 i = {0,1,3} [size = nrow+1 = 2+1] 3825 j = {0,0,2} [size = nz = 6] 3826 v = {1,2,3} [size = nz = 6] 3827 3828 Process1 [P1]: rows_owned=[2] 3829 i = {0,3} [size = nrow+1 = 1+1] 3830 j = {0,1,2} [size = nz = 6] 3831 v = {4,5,6} [size = nz = 6] 3832 3833 .keywords: matrix, aij, compressed row, sparse, parallel 3834 3835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3836 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3837 @*/ 3838 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3839 { 3840 PetscErrorCode ierr; 3841 3842 PetscFunctionBegin; 3843 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3844 PetscFunctionReturn(0); 3845 } 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3849 /*@C 3850 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3851 (the default parallel PETSc format). For good matrix assembly performance 3852 the user should preallocate the matrix storage by setting the parameters 3853 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3854 performance can be increased by more than a factor of 50. 3855 3856 Collective on MPI_Comm 3857 3858 Input Parameters: 3859 + B - the matrix 3860 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3861 (same value is used for all local rows) 3862 . d_nnz - array containing the number of nonzeros in the various rows of the 3863 DIAGONAL portion of the local submatrix (possibly different for each row) 3864 or NULL, if d_nz is used to specify the nonzero structure. 3865 The size of this array is equal to the number of local rows, i.e 'm'. 3866 For matrices that will be factored, you must leave room for (and set) 3867 the diagonal entry even if it is zero. 3868 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3869 submatrix (same value is used for all local rows). 3870 - o_nnz - array containing the number of nonzeros in the various rows of the 3871 OFF-DIAGONAL portion of the local submatrix (possibly different for 3872 each row) or NULL, if o_nz is used to specify the nonzero 3873 structure. The size of this array is equal to the number 3874 of local rows, i.e 'm'. 3875 3876 If the *_nnz parameter is given then the *_nz parameter is ignored 3877 3878 The AIJ format (also called the Yale sparse matrix format or 3879 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3880 storage. The stored row and column indices begin with zero. 3881 See Users-Manual: ch_mat for details. 3882 3883 The parallel matrix is partitioned such that the first m0 rows belong to 3884 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3885 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3886 3887 The DIAGONAL portion of the local submatrix of a processor can be defined 3888 as the submatrix which is obtained by extraction the part corresponding to 3889 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3890 first row that belongs to the processor, r2 is the last row belonging to 3891 the this processor, and c1-c2 is range of indices of the local part of a 3892 vector suitable for applying the matrix to. This is an mxn matrix. In the 3893 common case of a square matrix, the row and column ranges are the same and 3894 the DIAGONAL part is also square. The remaining portion of the local 3895 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3896 3897 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3898 3899 You can call MatGetInfo() to get information on how effective the preallocation was; 3900 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3901 You can also run with the option -info and look for messages with the string 3902 malloc in them to see if additional memory allocation was needed. 3903 3904 Example usage: 3905 3906 Consider the following 8x8 matrix with 34 non-zero values, that is 3907 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3908 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3909 as follows: 3910 3911 .vb 3912 1 2 0 | 0 3 0 | 0 4 3913 Proc0 0 5 6 | 7 0 0 | 8 0 3914 9 0 10 | 11 0 0 | 12 0 3915 ------------------------------------- 3916 13 0 14 | 15 16 17 | 0 0 3917 Proc1 0 18 0 | 19 20 21 | 0 0 3918 0 0 0 | 22 23 0 | 24 0 3919 ------------------------------------- 3920 Proc2 25 26 27 | 0 0 28 | 29 0 3921 30 0 0 | 31 32 33 | 0 34 3922 .ve 3923 3924 This can be represented as a collection of submatrices as: 3925 3926 .vb 3927 A B C 3928 D E F 3929 G H I 3930 .ve 3931 3932 Where the submatrices A,B,C are owned by proc0, D,E,F are 3933 owned by proc1, G,H,I are owned by proc2. 3934 3935 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3936 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3937 The 'M','N' parameters are 8,8, and have the same values on all procs. 3938 3939 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3940 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3941 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3942 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3943 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3944 matrix, ans [DF] as another SeqAIJ matrix. 3945 3946 When d_nz, o_nz parameters are specified, d_nz storage elements are 3947 allocated for every row of the local diagonal submatrix, and o_nz 3948 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3949 One way to choose d_nz and o_nz is to use the max nonzerors per local 3950 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3951 In this case, the values of d_nz,o_nz are: 3952 .vb 3953 proc0 : dnz = 2, o_nz = 2 3954 proc1 : dnz = 3, o_nz = 2 3955 proc2 : dnz = 1, o_nz = 4 3956 .ve 3957 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3958 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3959 for proc3. i.e we are using 12+15+10=37 storage locations to store 3960 34 values. 3961 3962 When d_nnz, o_nnz parameters are specified, the storage is specified 3963 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3964 In the above case the values for d_nnz,o_nnz are: 3965 .vb 3966 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3967 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3968 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3969 .ve 3970 Here the space allocated is sum of all the above values i.e 34, and 3971 hence pre-allocation is perfect. 3972 3973 Level: intermediate 3974 3975 .keywords: matrix, aij, compressed row, sparse, parallel 3976 3977 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3978 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3979 @*/ 3980 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3981 { 3982 PetscErrorCode ierr; 3983 3984 PetscFunctionBegin; 3985 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3986 PetscValidType(B,1); 3987 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3988 PetscFunctionReturn(0); 3989 } 3990 3991 #undef __FUNCT__ 3992 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3993 /*@ 3994 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3995 CSR format the local rows. 3996 3997 Collective on MPI_Comm 3998 3999 Input Parameters: 4000 + comm - MPI communicator 4001 . m - number of local rows (Cannot be PETSC_DECIDE) 4002 . n - This value should be the same as the local size used in creating the 4003 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4004 calculated if N is given) For square matrices n is almost always m. 4005 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4006 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4007 . i - row indices 4008 . j - column indices 4009 - a - matrix values 4010 4011 Output Parameter: 4012 . mat - the matrix 4013 4014 Level: intermediate 4015 4016 Notes: 4017 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4018 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4019 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4020 4021 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4022 4023 The format which is used for the sparse matrix input, is equivalent to a 4024 row-major ordering.. i.e for the following matrix, the input data expected is 4025 as shown: 4026 4027 1 0 0 4028 2 0 3 P0 4029 ------- 4030 4 5 6 P1 4031 4032 Process0 [P0]: rows_owned=[0,1] 4033 i = {0,1,3} [size = nrow+1 = 2+1] 4034 j = {0,0,2} [size = nz = 6] 4035 v = {1,2,3} [size = nz = 6] 4036 4037 Process1 [P1]: rows_owned=[2] 4038 i = {0,3} [size = nrow+1 = 1+1] 4039 j = {0,1,2} [size = nz = 6] 4040 v = {4,5,6} [size = nz = 6] 4041 4042 .keywords: matrix, aij, compressed row, sparse, parallel 4043 4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4045 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4046 @*/ 4047 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4048 { 4049 PetscErrorCode ierr; 4050 4051 PetscFunctionBegin; 4052 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4053 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4054 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4055 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4056 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4057 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4058 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4059 PetscFunctionReturn(0); 4060 } 4061 4062 #undef __FUNCT__ 4063 #define __FUNCT__ "MatCreateAIJ" 4064 /*@C 4065 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4066 (the default parallel PETSc format). For good matrix assembly performance 4067 the user should preallocate the matrix storage by setting the parameters 4068 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4069 performance can be increased by more than a factor of 50. 4070 4071 Collective on MPI_Comm 4072 4073 Input Parameters: 4074 + comm - MPI communicator 4075 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4076 This value should be the same as the local size used in creating the 4077 y vector for the matrix-vector product y = Ax. 4078 . n - This value should be the same as the local size used in creating the 4079 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4080 calculated if N is given) For square matrices n is almost always m. 4081 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4082 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4083 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4084 (same value is used for all local rows) 4085 . d_nnz - array containing the number of nonzeros in the various rows of the 4086 DIAGONAL portion of the local submatrix (possibly different for each row) 4087 or NULL, if d_nz is used to specify the nonzero structure. 4088 The size of this array is equal to the number of local rows, i.e 'm'. 4089 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4090 submatrix (same value is used for all local rows). 4091 - o_nnz - array containing the number of nonzeros in the various rows of the 4092 OFF-DIAGONAL portion of the local submatrix (possibly different for 4093 each row) or NULL, if o_nz is used to specify the nonzero 4094 structure. The size of this array is equal to the number 4095 of local rows, i.e 'm'. 4096 4097 Output Parameter: 4098 . A - the matrix 4099 4100 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4101 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4102 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4103 4104 Notes: 4105 If the *_nnz parameter is given then the *_nz parameter is ignored 4106 4107 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4108 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4109 storage requirements for this matrix. 4110 4111 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4112 processor than it must be used on all processors that share the object for 4113 that argument. 4114 4115 The user MUST specify either the local or global matrix dimensions 4116 (possibly both). 4117 4118 The parallel matrix is partitioned across processors such that the 4119 first m0 rows belong to process 0, the next m1 rows belong to 4120 process 1, the next m2 rows belong to process 2 etc.. where 4121 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4122 values corresponding to [m x N] submatrix. 4123 4124 The columns are logically partitioned with the n0 columns belonging 4125 to 0th partition, the next n1 columns belonging to the next 4126 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4127 4128 The DIAGONAL portion of the local submatrix on any given processor 4129 is the submatrix corresponding to the rows and columns m,n 4130 corresponding to the given processor. i.e diagonal matrix on 4131 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4132 etc. The remaining portion of the local submatrix [m x (N-n)] 4133 constitute the OFF-DIAGONAL portion. The example below better 4134 illustrates this concept. 4135 4136 For a square global matrix we define each processor's diagonal portion 4137 to be its local rows and the corresponding columns (a square submatrix); 4138 each processor's off-diagonal portion encompasses the remainder of the 4139 local matrix (a rectangular submatrix). 4140 4141 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4142 4143 When calling this routine with a single process communicator, a matrix of 4144 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4145 type of communicator, use the construction mechanism: 4146 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4147 4148 By default, this format uses inodes (identical nodes) when possible. 4149 We search for consecutive rows with the same nonzero structure, thereby 4150 reusing matrix information to achieve increased efficiency. 4151 4152 Options Database Keys: 4153 + -mat_no_inode - Do not use inodes 4154 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4155 - -mat_aij_oneindex - Internally use indexing starting at 1 4156 rather than 0. Note that when calling MatSetValues(), 4157 the user still MUST index entries starting at 0! 4158 4159 4160 Example usage: 4161 4162 Consider the following 8x8 matrix with 34 non-zero values, that is 4163 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4164 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4165 as follows: 4166 4167 .vb 4168 1 2 0 | 0 3 0 | 0 4 4169 Proc0 0 5 6 | 7 0 0 | 8 0 4170 9 0 10 | 11 0 0 | 12 0 4171 ------------------------------------- 4172 13 0 14 | 15 16 17 | 0 0 4173 Proc1 0 18 0 | 19 20 21 | 0 0 4174 0 0 0 | 22 23 0 | 24 0 4175 ------------------------------------- 4176 Proc2 25 26 27 | 0 0 28 | 29 0 4177 30 0 0 | 31 32 33 | 0 34 4178 .ve 4179 4180 This can be represented as a collection of submatrices as: 4181 4182 .vb 4183 A B C 4184 D E F 4185 G H I 4186 .ve 4187 4188 Where the submatrices A,B,C are owned by proc0, D,E,F are 4189 owned by proc1, G,H,I are owned by proc2. 4190 4191 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4192 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4193 The 'M','N' parameters are 8,8, and have the same values on all procs. 4194 4195 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4196 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4197 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4198 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4199 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4200 matrix, ans [DF] as another SeqAIJ matrix. 4201 4202 When d_nz, o_nz parameters are specified, d_nz storage elements are 4203 allocated for every row of the local diagonal submatrix, and o_nz 4204 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4205 One way to choose d_nz and o_nz is to use the max nonzerors per local 4206 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4207 In this case, the values of d_nz,o_nz are: 4208 .vb 4209 proc0 : dnz = 2, o_nz = 2 4210 proc1 : dnz = 3, o_nz = 2 4211 proc2 : dnz = 1, o_nz = 4 4212 .ve 4213 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4214 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4215 for proc3. i.e we are using 12+15+10=37 storage locations to store 4216 34 values. 4217 4218 When d_nnz, o_nnz parameters are specified, the storage is specified 4219 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4220 In the above case the values for d_nnz,o_nnz are: 4221 .vb 4222 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4223 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4224 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4225 .ve 4226 Here the space allocated is sum of all the above values i.e 34, and 4227 hence pre-allocation is perfect. 4228 4229 Level: intermediate 4230 4231 .keywords: matrix, aij, compressed row, sparse, parallel 4232 4233 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4234 MPIAIJ, MatCreateMPIAIJWithArrays() 4235 @*/ 4236 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4237 { 4238 PetscErrorCode ierr; 4239 PetscMPIInt size; 4240 4241 PetscFunctionBegin; 4242 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4243 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4244 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4245 if (size > 1) { 4246 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4247 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4248 } else { 4249 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4250 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4251 } 4252 PetscFunctionReturn(0); 4253 } 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4257 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4258 { 4259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4260 4261 PetscFunctionBegin; 4262 if (Ad) *Ad = a->A; 4263 if (Ao) *Ao = a->B; 4264 if (colmap) *colmap = a->garray; 4265 PetscFunctionReturn(0); 4266 } 4267 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4270 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4271 { 4272 PetscErrorCode ierr; 4273 PetscInt i; 4274 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4275 4276 PetscFunctionBegin; 4277 if (coloring->ctype == IS_COLORING_GLOBAL) { 4278 ISColoringValue *allcolors,*colors; 4279 ISColoring ocoloring; 4280 4281 /* set coloring for diagonal portion */ 4282 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4283 4284 /* set coloring for off-diagonal portion */ 4285 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4286 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4287 for (i=0; i<a->B->cmap->n; i++) { 4288 colors[i] = allcolors[a->garray[i]]; 4289 } 4290 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4291 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4292 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4293 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4294 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4295 ISColoringValue *colors; 4296 PetscInt *larray; 4297 ISColoring ocoloring; 4298 4299 /* set coloring for diagonal portion */ 4300 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4301 for (i=0; i<a->A->cmap->n; i++) { 4302 larray[i] = i + A->cmap->rstart; 4303 } 4304 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4305 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4306 for (i=0; i<a->A->cmap->n; i++) { 4307 colors[i] = coloring->colors[larray[i]]; 4308 } 4309 ierr = PetscFree(larray);CHKERRQ(ierr); 4310 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4311 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4312 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4313 4314 /* set coloring for off-diagonal portion */ 4315 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4316 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4317 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4318 for (i=0; i<a->B->cmap->n; i++) { 4319 colors[i] = coloring->colors[larray[i]]; 4320 } 4321 ierr = PetscFree(larray);CHKERRQ(ierr); 4322 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4323 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4324 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4325 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4331 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4332 { 4333 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4334 PetscErrorCode ierr; 4335 4336 PetscFunctionBegin; 4337 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4338 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4339 PetscFunctionReturn(0); 4340 } 4341 4342 #undef __FUNCT__ 4343 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4344 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4345 { 4346 PetscErrorCode ierr; 4347 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4348 PetscInt *indx; 4349 4350 PetscFunctionBegin; 4351 /* This routine will ONLY return MPIAIJ type matrix */ 4352 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4353 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4354 if (n == PETSC_DECIDE) { 4355 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4356 } 4357 /* Check sum(n) = N */ 4358 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4359 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4360 4361 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4362 rstart -= m; 4363 4364 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4365 for (i=0; i<m; i++) { 4366 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4367 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4368 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4369 } 4370 4371 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4372 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4373 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4374 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4375 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4376 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4382 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4383 { 4384 PetscErrorCode ierr; 4385 PetscInt m,N,i,rstart,nnz,Ii; 4386 PetscInt *indx; 4387 PetscScalar *values; 4388 4389 PetscFunctionBegin; 4390 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4391 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4392 for (i=0; i<m; i++) { 4393 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4394 Ii = i + rstart; 4395 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4396 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4397 } 4398 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4399 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4405 /*@ 4406 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4407 matrices from each processor 4408 4409 Collective on MPI_Comm 4410 4411 Input Parameters: 4412 + comm - the communicators the parallel matrix will live on 4413 . inmat - the input sequential matrices 4414 . n - number of local columns (or PETSC_DECIDE) 4415 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4416 4417 Output Parameter: 4418 . outmat - the parallel matrix generated 4419 4420 Level: advanced 4421 4422 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4423 4424 @*/ 4425 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4426 { 4427 PetscErrorCode ierr; 4428 PetscMPIInt size; 4429 4430 PetscFunctionBegin; 4431 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4432 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4433 if (size == 1) { 4434 if (scall == MAT_INITIAL_MATRIX) { 4435 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4436 } else { 4437 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4438 } 4439 } else { 4440 if (scall == MAT_INITIAL_MATRIX) { 4441 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4442 } 4443 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4444 } 4445 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4446 PetscFunctionReturn(0); 4447 } 4448 4449 #undef __FUNCT__ 4450 #define __FUNCT__ "MatFileSplit" 4451 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4452 { 4453 PetscErrorCode ierr; 4454 PetscMPIInt rank; 4455 PetscInt m,N,i,rstart,nnz; 4456 size_t len; 4457 const PetscInt *indx; 4458 PetscViewer out; 4459 char *name; 4460 Mat B; 4461 const PetscScalar *values; 4462 4463 PetscFunctionBegin; 4464 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4465 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4466 /* Should this be the type of the diagonal block of A? */ 4467 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4468 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4469 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4470 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4471 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4472 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4473 for (i=0; i<m; i++) { 4474 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4475 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4476 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4477 } 4478 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4479 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4480 4481 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4482 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4483 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4484 sprintf(name,"%s.%d",outfile,rank); 4485 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4486 ierr = PetscFree(name);CHKERRQ(ierr); 4487 ierr = MatView(B,out);CHKERRQ(ierr); 4488 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4489 ierr = MatDestroy(&B);CHKERRQ(ierr); 4490 PetscFunctionReturn(0); 4491 } 4492 4493 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4494 #undef __FUNCT__ 4495 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4496 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4497 { 4498 PetscErrorCode ierr; 4499 Mat_Merge_SeqsToMPI *merge; 4500 PetscContainer container; 4501 4502 PetscFunctionBegin; 4503 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4504 if (container) { 4505 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4511 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4516 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4517 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4518 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4519 ierr = PetscFree(merge);CHKERRQ(ierr); 4520 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4521 } 4522 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 #include <../src/mat/utils/freespace.h> 4527 #include <petscbt.h> 4528 4529 #undef __FUNCT__ 4530 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4531 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4532 { 4533 PetscErrorCode ierr; 4534 MPI_Comm comm; 4535 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4536 PetscMPIInt size,rank,taga,*len_s; 4537 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4538 PetscInt proc,m; 4539 PetscInt **buf_ri,**buf_rj; 4540 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4541 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4542 MPI_Request *s_waits,*r_waits; 4543 MPI_Status *status; 4544 MatScalar *aa=a->a; 4545 MatScalar **abuf_r,*ba_i; 4546 Mat_Merge_SeqsToMPI *merge; 4547 PetscContainer container; 4548 4549 PetscFunctionBegin; 4550 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4551 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4552 4553 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4554 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4555 4556 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4557 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4558 4559 bi = merge->bi; 4560 bj = merge->bj; 4561 buf_ri = merge->buf_ri; 4562 buf_rj = merge->buf_rj; 4563 4564 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4565 owners = merge->rowmap->range; 4566 len_s = merge->len_s; 4567 4568 /* send and recv matrix values */ 4569 /*-----------------------------*/ 4570 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4571 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4572 4573 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4574 for (proc=0,k=0; proc<size; proc++) { 4575 if (!len_s[proc]) continue; 4576 i = owners[proc]; 4577 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4578 k++; 4579 } 4580 4581 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4582 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4583 ierr = PetscFree(status);CHKERRQ(ierr); 4584 4585 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4586 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4587 4588 /* insert mat values of mpimat */ 4589 /*----------------------------*/ 4590 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4591 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4592 4593 for (k=0; k<merge->nrecv; k++) { 4594 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4595 nrows = *(buf_ri_k[k]); 4596 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4597 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4598 } 4599 4600 /* set values of ba */ 4601 m = merge->rowmap->n; 4602 for (i=0; i<m; i++) { 4603 arow = owners[rank] + i; 4604 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4605 bnzi = bi[i+1] - bi[i]; 4606 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4607 4608 /* add local non-zero vals of this proc's seqmat into ba */ 4609 anzi = ai[arow+1] - ai[arow]; 4610 aj = a->j + ai[arow]; 4611 aa = a->a + ai[arow]; 4612 nextaj = 0; 4613 for (j=0; nextaj<anzi; j++) { 4614 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4615 ba_i[j] += aa[nextaj++]; 4616 } 4617 } 4618 4619 /* add received vals into ba */ 4620 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4621 /* i-th row */ 4622 if (i == *nextrow[k]) { 4623 anzi = *(nextai[k]+1) - *nextai[k]; 4624 aj = buf_rj[k] + *(nextai[k]); 4625 aa = abuf_r[k] + *(nextai[k]); 4626 nextaj = 0; 4627 for (j=0; nextaj<anzi; j++) { 4628 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4629 ba_i[j] += aa[nextaj++]; 4630 } 4631 } 4632 nextrow[k]++; nextai[k]++; 4633 } 4634 } 4635 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4636 } 4637 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4638 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4639 4640 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4641 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4642 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4643 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4644 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4645 PetscFunctionReturn(0); 4646 } 4647 4648 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4649 4650 #undef __FUNCT__ 4651 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4652 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4653 { 4654 PetscErrorCode ierr; 4655 Mat B_mpi; 4656 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4657 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4658 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4659 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4660 PetscInt len,proc,*dnz,*onz,bs,cbs; 4661 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4662 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4663 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4664 MPI_Status *status; 4665 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4666 PetscBT lnkbt; 4667 Mat_Merge_SeqsToMPI *merge; 4668 PetscContainer container; 4669 4670 PetscFunctionBegin; 4671 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4672 4673 /* make sure it is a PETSc comm */ 4674 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4677 4678 ierr = PetscNew(&merge);CHKERRQ(ierr); 4679 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4680 4681 /* determine row ownership */ 4682 /*---------------------------------------------------------*/ 4683 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4684 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4685 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4686 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4687 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4688 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4689 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4690 4691 m = merge->rowmap->n; 4692 owners = merge->rowmap->range; 4693 4694 /* determine the number of messages to send, their lengths */ 4695 /*---------------------------------------------------------*/ 4696 len_s = merge->len_s; 4697 4698 len = 0; /* length of buf_si[] */ 4699 merge->nsend = 0; 4700 for (proc=0; proc<size; proc++) { 4701 len_si[proc] = 0; 4702 if (proc == rank) { 4703 len_s[proc] = 0; 4704 } else { 4705 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4706 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4707 } 4708 if (len_s[proc]) { 4709 merge->nsend++; 4710 nrows = 0; 4711 for (i=owners[proc]; i<owners[proc+1]; i++) { 4712 if (ai[i+1] > ai[i]) nrows++; 4713 } 4714 len_si[proc] = 2*(nrows+1); 4715 len += len_si[proc]; 4716 } 4717 } 4718 4719 /* determine the number and length of messages to receive for ij-structure */ 4720 /*-------------------------------------------------------------------------*/ 4721 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4722 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4723 4724 /* post the Irecv of j-structure */ 4725 /*-------------------------------*/ 4726 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4728 4729 /* post the Isend of j-structure */ 4730 /*--------------------------------*/ 4731 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4732 4733 for (proc=0, k=0; proc<size; proc++) { 4734 if (!len_s[proc]) continue; 4735 i = owners[proc]; 4736 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4737 k++; 4738 } 4739 4740 /* receives and sends of j-structure are complete */ 4741 /*------------------------------------------------*/ 4742 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4743 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4744 4745 /* send and recv i-structure */ 4746 /*---------------------------*/ 4747 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4748 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4749 4750 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4751 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4752 for (proc=0,k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 /* form outgoing message for i-structure: 4755 buf_si[0]: nrows to be sent 4756 [1:nrows]: row index (global) 4757 [nrows+1:2*nrows+1]: i-structure index 4758 */ 4759 /*-------------------------------------------*/ 4760 nrows = len_si[proc]/2 - 1; 4761 buf_si_i = buf_si + nrows+1; 4762 buf_si[0] = nrows; 4763 buf_si_i[0] = 0; 4764 nrows = 0; 4765 for (i=owners[proc]; i<owners[proc+1]; i++) { 4766 anzi = ai[i+1] - ai[i]; 4767 if (anzi) { 4768 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4769 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4770 nrows++; 4771 } 4772 } 4773 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4774 k++; 4775 buf_si += len_si[proc]; 4776 } 4777 4778 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4779 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4780 4781 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4782 for (i=0; i<merge->nrecv; i++) { 4783 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4784 } 4785 4786 ierr = PetscFree(len_si);CHKERRQ(ierr); 4787 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4788 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4789 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4790 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4791 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4792 ierr = PetscFree(status);CHKERRQ(ierr); 4793 4794 /* compute a local seq matrix in each processor */ 4795 /*----------------------------------------------*/ 4796 /* allocate bi array and free space for accumulating nonzero column info */ 4797 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4798 bi[0] = 0; 4799 4800 /* create and initialize a linked list */ 4801 nlnk = N+1; 4802 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 4804 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4805 len = ai[owners[rank+1]] - ai[owners[rank]]; 4806 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4807 4808 current_space = free_space; 4809 4810 /* determine symbolic info for each local row */ 4811 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4812 4813 for (k=0; k<merge->nrecv; k++) { 4814 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4815 nrows = *buf_ri_k[k]; 4816 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4817 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4818 } 4819 4820 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4821 len = 0; 4822 for (i=0; i<m; i++) { 4823 bnzi = 0; 4824 /* add local non-zero cols of this proc's seqmat into lnk */ 4825 arow = owners[rank] + i; 4826 anzi = ai[arow+1] - ai[arow]; 4827 aj = a->j + ai[arow]; 4828 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4829 bnzi += nlnk; 4830 /* add received col data into lnk */ 4831 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4832 if (i == *nextrow[k]) { /* i-th row */ 4833 anzi = *(nextai[k]+1) - *nextai[k]; 4834 aj = buf_rj[k] + *nextai[k]; 4835 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4836 bnzi += nlnk; 4837 nextrow[k]++; nextai[k]++; 4838 } 4839 } 4840 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4841 4842 /* if free space is not available, make more free space */ 4843 if (current_space->local_remaining<bnzi) { 4844 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4845 nspacedouble++; 4846 } 4847 /* copy data into free space, then initialize lnk */ 4848 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4849 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4850 4851 current_space->array += bnzi; 4852 current_space->local_used += bnzi; 4853 current_space->local_remaining -= bnzi; 4854 4855 bi[i+1] = bi[i] + bnzi; 4856 } 4857 4858 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4859 4860 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4861 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4862 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4863 4864 /* create symbolic parallel matrix B_mpi */ 4865 /*---------------------------------------*/ 4866 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4867 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4868 if (n==PETSC_DECIDE) { 4869 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4870 } else { 4871 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4872 } 4873 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4874 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4875 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4876 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4877 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4878 4879 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4880 B_mpi->assembled = PETSC_FALSE; 4881 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4882 merge->bi = bi; 4883 merge->bj = bj; 4884 merge->buf_ri = buf_ri; 4885 merge->buf_rj = buf_rj; 4886 merge->coi = NULL; 4887 merge->coj = NULL; 4888 merge->owners_co = NULL; 4889 4890 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4891 4892 /* attach the supporting struct to B_mpi for reuse */ 4893 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4894 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4895 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4896 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4897 *mpimat = B_mpi; 4898 4899 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4905 /*@C 4906 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4907 matrices from each processor 4908 4909 Collective on MPI_Comm 4910 4911 Input Parameters: 4912 + comm - the communicators the parallel matrix will live on 4913 . seqmat - the input sequential matrices 4914 . m - number of local rows (or PETSC_DECIDE) 4915 . n - number of local columns (or PETSC_DECIDE) 4916 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4917 4918 Output Parameter: 4919 . mpimat - the parallel matrix generated 4920 4921 Level: advanced 4922 4923 Notes: 4924 The dimensions of the sequential matrix in each processor MUST be the same. 4925 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4926 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4927 @*/ 4928 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4929 { 4930 PetscErrorCode ierr; 4931 PetscMPIInt size; 4932 4933 PetscFunctionBegin; 4934 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4935 if (size == 1) { 4936 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4937 if (scall == MAT_INITIAL_MATRIX) { 4938 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4939 } else { 4940 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4941 } 4942 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4946 if (scall == MAT_INITIAL_MATRIX) { 4947 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4948 } 4949 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4950 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4956 /*@ 4957 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4958 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4959 with MatGetSize() 4960 4961 Not Collective 4962 4963 Input Parameters: 4964 + A - the matrix 4965 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4966 4967 Output Parameter: 4968 . A_loc - the local sequential matrix generated 4969 4970 Level: developer 4971 4972 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4973 4974 @*/ 4975 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4976 { 4977 PetscErrorCode ierr; 4978 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4979 Mat_SeqAIJ *mat,*a,*b; 4980 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4981 MatScalar *aa,*ba,*cam; 4982 PetscScalar *ca; 4983 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4984 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4985 PetscBool match; 4986 4987 PetscFunctionBegin; 4988 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4989 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4990 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4991 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4992 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4993 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4994 aa = a->a; ba = b->a; 4995 if (scall == MAT_INITIAL_MATRIX) { 4996 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4997 ci[0] = 0; 4998 for (i=0; i<am; i++) { 4999 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5000 } 5001 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 5002 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 5003 k = 0; 5004 for (i=0; i<am; i++) { 5005 ncols_o = bi[i+1] - bi[i]; 5006 ncols_d = ai[i+1] - ai[i]; 5007 /* off-diagonal portion of A */ 5008 for (jo=0; jo<ncols_o; jo++) { 5009 col = cmap[*bj]; 5010 if (col >= cstart) break; 5011 cj[k] = col; bj++; 5012 ca[k++] = *ba++; 5013 } 5014 /* diagonal portion of A */ 5015 for (j=0; j<ncols_d; j++) { 5016 cj[k] = cstart + *aj++; 5017 ca[k++] = *aa++; 5018 } 5019 /* off-diagonal portion of A */ 5020 for (j=jo; j<ncols_o; j++) { 5021 cj[k] = cmap[*bj++]; 5022 ca[k++] = *ba++; 5023 } 5024 } 5025 /* put together the new matrix */ 5026 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5027 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5028 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5029 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5030 mat->free_a = PETSC_TRUE; 5031 mat->free_ij = PETSC_TRUE; 5032 mat->nonew = 0; 5033 } else if (scall == MAT_REUSE_MATRIX) { 5034 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5035 ci = mat->i; cj = mat->j; cam = mat->a; 5036 for (i=0; i<am; i++) { 5037 /* off-diagonal portion of A */ 5038 ncols_o = bi[i+1] - bi[i]; 5039 for (jo=0; jo<ncols_o; jo++) { 5040 col = cmap[*bj]; 5041 if (col >= cstart) break; 5042 *cam++ = *ba++; bj++; 5043 } 5044 /* diagonal portion of A */ 5045 ncols_d = ai[i+1] - ai[i]; 5046 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5047 /* off-diagonal portion of A */ 5048 for (j=jo; j<ncols_o; j++) { 5049 *cam++ = *ba++; bj++; 5050 } 5051 } 5052 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5053 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5054 PetscFunctionReturn(0); 5055 } 5056 5057 #undef __FUNCT__ 5058 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5059 /*@C 5060 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5061 5062 Not Collective 5063 5064 Input Parameters: 5065 + A - the matrix 5066 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5067 - row, col - index sets of rows and columns to extract (or NULL) 5068 5069 Output Parameter: 5070 . A_loc - the local sequential matrix generated 5071 5072 Level: developer 5073 5074 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5075 5076 @*/ 5077 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5078 { 5079 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5080 PetscErrorCode ierr; 5081 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5082 IS isrowa,iscola; 5083 Mat *aloc; 5084 PetscBool match; 5085 5086 PetscFunctionBegin; 5087 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5088 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5089 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5090 if (!row) { 5091 start = A->rmap->rstart; end = A->rmap->rend; 5092 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5093 } else { 5094 isrowa = *row; 5095 } 5096 if (!col) { 5097 start = A->cmap->rstart; 5098 cmap = a->garray; 5099 nzA = a->A->cmap->n; 5100 nzB = a->B->cmap->n; 5101 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5102 ncols = 0; 5103 for (i=0; i<nzB; i++) { 5104 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5105 else break; 5106 } 5107 imark = i; 5108 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5109 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5110 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5111 } else { 5112 iscola = *col; 5113 } 5114 if (scall != MAT_INITIAL_MATRIX) { 5115 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5116 aloc[0] = *A_loc; 5117 } 5118 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5119 *A_loc = aloc[0]; 5120 ierr = PetscFree(aloc);CHKERRQ(ierr); 5121 if (!row) { 5122 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5123 } 5124 if (!col) { 5125 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5126 } 5127 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5128 PetscFunctionReturn(0); 5129 } 5130 5131 #undef __FUNCT__ 5132 #define __FUNCT__ "MatGetBrowsOfAcols" 5133 /*@C 5134 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5135 5136 Collective on Mat 5137 5138 Input Parameters: 5139 + A,B - the matrices in mpiaij format 5140 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5141 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5142 5143 Output Parameter: 5144 + rowb, colb - index sets of rows and columns of B to extract 5145 - B_seq - the sequential matrix generated 5146 5147 Level: developer 5148 5149 @*/ 5150 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5151 { 5152 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5153 PetscErrorCode ierr; 5154 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5155 IS isrowb,iscolb; 5156 Mat *bseq=NULL; 5157 5158 PetscFunctionBegin; 5159 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5160 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5161 } 5162 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5163 5164 if (scall == MAT_INITIAL_MATRIX) { 5165 start = A->cmap->rstart; 5166 cmap = a->garray; 5167 nzA = a->A->cmap->n; 5168 nzB = a->B->cmap->n; 5169 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5170 ncols = 0; 5171 for (i=0; i<nzB; i++) { /* row < local row index */ 5172 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5173 else break; 5174 } 5175 imark = i; 5176 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5177 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5178 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5179 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5180 } else { 5181 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5182 isrowb = *rowb; iscolb = *colb; 5183 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5184 bseq[0] = *B_seq; 5185 } 5186 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5187 *B_seq = bseq[0]; 5188 ierr = PetscFree(bseq);CHKERRQ(ierr); 5189 if (!rowb) { 5190 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5191 } else { 5192 *rowb = isrowb; 5193 } 5194 if (!colb) { 5195 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5196 } else { 5197 *colb = iscolb; 5198 } 5199 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5200 PetscFunctionReturn(0); 5201 } 5202 5203 #undef __FUNCT__ 5204 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5205 /* 5206 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5207 of the OFF-DIAGONAL portion of local A 5208 5209 Collective on Mat 5210 5211 Input Parameters: 5212 + A,B - the matrices in mpiaij format 5213 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5214 5215 Output Parameter: 5216 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5217 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5218 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5219 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5220 5221 Level: developer 5222 5223 */ 5224 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5225 { 5226 VecScatter_MPI_General *gen_to,*gen_from; 5227 PetscErrorCode ierr; 5228 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5229 Mat_SeqAIJ *b_oth; 5230 VecScatter ctx =a->Mvctx; 5231 MPI_Comm comm; 5232 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5233 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5234 PetscScalar *rvalues,*svalues; 5235 MatScalar *b_otha,*bufa,*bufA; 5236 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5237 MPI_Request *rwaits = NULL,*swaits = NULL; 5238 MPI_Status *sstatus,rstatus; 5239 PetscMPIInt jj; 5240 PetscInt *cols,sbs,rbs; 5241 PetscScalar *vals; 5242 5243 PetscFunctionBegin; 5244 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5245 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5246 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5247 } 5248 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5250 5251 gen_to = (VecScatter_MPI_General*)ctx->todata; 5252 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5253 rvalues = gen_from->values; /* holds the length of receiving row */ 5254 svalues = gen_to->values; /* holds the length of sending row */ 5255 nrecvs = gen_from->n; 5256 nsends = gen_to->n; 5257 5258 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5259 srow = gen_to->indices; /* local row index to be sent */ 5260 sstarts = gen_to->starts; 5261 sprocs = gen_to->procs; 5262 sstatus = gen_to->sstatus; 5263 sbs = gen_to->bs; 5264 rstarts = gen_from->starts; 5265 rprocs = gen_from->procs; 5266 rbs = gen_from->bs; 5267 5268 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5269 if (scall == MAT_INITIAL_MATRIX) { 5270 /* i-array */ 5271 /*---------*/ 5272 /* post receives */ 5273 for (i=0; i<nrecvs; i++) { 5274 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5275 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5276 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5277 } 5278 5279 /* pack the outgoing message */ 5280 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5281 5282 sstartsj[0] = 0; 5283 rstartsj[0] = 0; 5284 len = 0; /* total length of j or a array to be sent */ 5285 k = 0; 5286 for (i=0; i<nsends; i++) { 5287 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5288 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5289 for (j=0; j<nrows; j++) { 5290 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5291 for (l=0; l<sbs; l++) { 5292 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5293 5294 rowlen[j*sbs+l] = ncols; 5295 5296 len += ncols; 5297 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5298 } 5299 k++; 5300 } 5301 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5302 5303 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5304 } 5305 /* recvs and sends of i-array are completed */ 5306 i = nrecvs; 5307 while (i--) { 5308 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5309 } 5310 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5311 5312 /* allocate buffers for sending j and a arrays */ 5313 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5314 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5315 5316 /* create i-array of B_oth */ 5317 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5318 5319 b_othi[0] = 0; 5320 len = 0; /* total length of j or a array to be received */ 5321 k = 0; 5322 for (i=0; i<nrecvs; i++) { 5323 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5324 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5325 for (j=0; j<nrows; j++) { 5326 b_othi[k+1] = b_othi[k] + rowlen[j]; 5327 len += rowlen[j]; k++; 5328 } 5329 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5330 } 5331 5332 /* allocate space for j and a arrrays of B_oth */ 5333 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5334 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5335 5336 /* j-array */ 5337 /*---------*/ 5338 /* post receives of j-array */ 5339 for (i=0; i<nrecvs; i++) { 5340 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5341 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5342 } 5343 5344 /* pack the outgoing message j-array */ 5345 k = 0; 5346 for (i=0; i<nsends; i++) { 5347 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5348 bufJ = bufj+sstartsj[i]; 5349 for (j=0; j<nrows; j++) { 5350 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5351 for (ll=0; ll<sbs; ll++) { 5352 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5353 for (l=0; l<ncols; l++) { 5354 *bufJ++ = cols[l]; 5355 } 5356 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5357 } 5358 } 5359 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5360 } 5361 5362 /* recvs and sends of j-array are completed */ 5363 i = nrecvs; 5364 while (i--) { 5365 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5366 } 5367 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5368 } else if (scall == MAT_REUSE_MATRIX) { 5369 sstartsj = *startsj_s; 5370 rstartsj = *startsj_r; 5371 bufa = *bufa_ptr; 5372 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5373 b_otha = b_oth->a; 5374 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5375 5376 /* a-array */ 5377 /*---------*/ 5378 /* post receives of a-array */ 5379 for (i=0; i<nrecvs; i++) { 5380 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5381 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5382 } 5383 5384 /* pack the outgoing message a-array */ 5385 k = 0; 5386 for (i=0; i<nsends; i++) { 5387 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5388 bufA = bufa+sstartsj[i]; 5389 for (j=0; j<nrows; j++) { 5390 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5391 for (ll=0; ll<sbs; ll++) { 5392 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5393 for (l=0; l<ncols; l++) { 5394 *bufA++ = vals[l]; 5395 } 5396 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5397 } 5398 } 5399 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5400 } 5401 /* recvs and sends of a-array are completed */ 5402 i = nrecvs; 5403 while (i--) { 5404 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5405 } 5406 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5407 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5408 5409 if (scall == MAT_INITIAL_MATRIX) { 5410 /* put together the new matrix */ 5411 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5412 5413 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5414 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5415 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5416 b_oth->free_a = PETSC_TRUE; 5417 b_oth->free_ij = PETSC_TRUE; 5418 b_oth->nonew = 0; 5419 5420 ierr = PetscFree(bufj);CHKERRQ(ierr); 5421 if (!startsj_s || !bufa_ptr) { 5422 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5423 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5424 } else { 5425 *startsj_s = sstartsj; 5426 *startsj_r = rstartsj; 5427 *bufa_ptr = bufa; 5428 } 5429 } 5430 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5431 PetscFunctionReturn(0); 5432 } 5433 5434 #undef __FUNCT__ 5435 #define __FUNCT__ "MatGetCommunicationStructs" 5436 /*@C 5437 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5438 5439 Not Collective 5440 5441 Input Parameters: 5442 . A - The matrix in mpiaij format 5443 5444 Output Parameter: 5445 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5446 . colmap - A map from global column index to local index into lvec 5447 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5448 5449 Level: developer 5450 5451 @*/ 5452 #if defined(PETSC_USE_CTABLE) 5453 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5454 #else 5455 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5456 #endif 5457 { 5458 Mat_MPIAIJ *a; 5459 5460 PetscFunctionBegin; 5461 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5462 PetscValidPointer(lvec, 2); 5463 PetscValidPointer(colmap, 3); 5464 PetscValidPointer(multScatter, 4); 5465 a = (Mat_MPIAIJ*) A->data; 5466 if (lvec) *lvec = a->lvec; 5467 if (colmap) *colmap = a->colmap; 5468 if (multScatter) *multScatter = a->Mvctx; 5469 PetscFunctionReturn(0); 5470 } 5471 5472 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5473 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5474 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5475 5476 #undef __FUNCT__ 5477 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5478 /* 5479 Computes (B'*A')' since computing B*A directly is untenable 5480 5481 n p p 5482 ( ) ( ) ( ) 5483 m ( A ) * n ( B ) = m ( C ) 5484 ( ) ( ) ( ) 5485 5486 */ 5487 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5488 { 5489 PetscErrorCode ierr; 5490 Mat At,Bt,Ct; 5491 5492 PetscFunctionBegin; 5493 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5494 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5495 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5496 ierr = MatDestroy(&At);CHKERRQ(ierr); 5497 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5498 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5499 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5500 PetscFunctionReturn(0); 5501 } 5502 5503 #undef __FUNCT__ 5504 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5505 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5506 { 5507 PetscErrorCode ierr; 5508 PetscInt m=A->rmap->n,n=B->cmap->n; 5509 Mat Cmat; 5510 5511 PetscFunctionBegin; 5512 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5513 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5514 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5515 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5516 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5517 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5518 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5519 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5520 5521 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5522 5523 *C = Cmat; 5524 PetscFunctionReturn(0); 5525 } 5526 5527 /* ----------------------------------------------------------------*/ 5528 #undef __FUNCT__ 5529 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5530 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5531 { 5532 PetscErrorCode ierr; 5533 5534 PetscFunctionBegin; 5535 if (scall == MAT_INITIAL_MATRIX) { 5536 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5537 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5538 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5539 } 5540 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5541 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5542 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5543 PetscFunctionReturn(0); 5544 } 5545 5546 #if defined(PETSC_HAVE_MUMPS) 5547 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5548 #endif 5549 #if defined(PETSC_HAVE_PASTIX) 5550 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5551 #endif 5552 #if defined(PETSC_HAVE_SUPERLU_DIST) 5553 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5554 #endif 5555 #if defined(PETSC_HAVE_CLIQUE) 5556 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5557 #endif 5558 5559 /*MC 5560 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5561 5562 Options Database Keys: 5563 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5564 5565 Level: beginner 5566 5567 .seealso: MatCreateAIJ() 5568 M*/ 5569 5570 #undef __FUNCT__ 5571 #define __FUNCT__ "MatCreate_MPIAIJ" 5572 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5573 { 5574 Mat_MPIAIJ *b; 5575 PetscErrorCode ierr; 5576 PetscMPIInt size; 5577 5578 PetscFunctionBegin; 5579 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5580 5581 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5582 B->data = (void*)b; 5583 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5584 B->assembled = PETSC_FALSE; 5585 B->insertmode = NOT_SET_VALUES; 5586 b->size = size; 5587 5588 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5589 5590 /* build cache for off array entries formed */ 5591 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5592 5593 b->donotstash = PETSC_FALSE; 5594 b->colmap = 0; 5595 b->garray = 0; 5596 b->roworiented = PETSC_TRUE; 5597 5598 /* stuff used for matrix vector multiply */ 5599 b->lvec = NULL; 5600 b->Mvctx = NULL; 5601 5602 /* stuff for MatGetRow() */ 5603 b->rowindices = 0; 5604 b->rowvalues = 0; 5605 b->getrowactive = PETSC_FALSE; 5606 5607 /* flexible pointer used in CUSP/CUSPARSE classes */ 5608 b->spptr = NULL; 5609 5610 #if defined(PETSC_HAVE_MUMPS) 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5612 #endif 5613 #if defined(PETSC_HAVE_PASTIX) 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5615 #endif 5616 #if defined(PETSC_HAVE_SUPERLU_DIST) 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5618 #endif 5619 #if defined(PETSC_HAVE_CLIQUE) 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5621 #endif 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5635 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5636 PetscFunctionReturn(0); 5637 } 5638 5639 #undef __FUNCT__ 5640 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5641 /*@ 5642 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5643 and "off-diagonal" part of the matrix in CSR format. 5644 5645 Collective on MPI_Comm 5646 5647 Input Parameters: 5648 + comm - MPI communicator 5649 . m - number of local rows (Cannot be PETSC_DECIDE) 5650 . n - This value should be the same as the local size used in creating the 5651 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5652 calculated if N is given) For square matrices n is almost always m. 5653 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5654 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5655 . i - row indices for "diagonal" portion of matrix 5656 . j - column indices 5657 . a - matrix values 5658 . oi - row indices for "off-diagonal" portion of matrix 5659 . oj - column indices 5660 - oa - matrix values 5661 5662 Output Parameter: 5663 . mat - the matrix 5664 5665 Level: advanced 5666 5667 Notes: 5668 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5669 must free the arrays once the matrix has been destroyed and not before. 5670 5671 The i and j indices are 0 based 5672 5673 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5674 5675 This sets local rows and cannot be used to set off-processor values. 5676 5677 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5678 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5679 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5680 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5681 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5682 communication if it is known that only local entries will be set. 5683 5684 .keywords: matrix, aij, compressed row, sparse, parallel 5685 5686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5687 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5688 @*/ 5689 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5690 { 5691 PetscErrorCode ierr; 5692 Mat_MPIAIJ *maij; 5693 5694 PetscFunctionBegin; 5695 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5696 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5697 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5698 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5699 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5700 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5701 maij = (Mat_MPIAIJ*) (*mat)->data; 5702 5703 (*mat)->preallocated = PETSC_TRUE; 5704 5705 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5706 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5707 5708 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5709 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5710 5711 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5712 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5713 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5714 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5715 5716 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5719 PetscFunctionReturn(0); 5720 } 5721 5722 /* 5723 Special version for direct calls from Fortran 5724 */ 5725 #include <petsc-private/fortranimpl.h> 5726 5727 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5728 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5729 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5730 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5731 #endif 5732 5733 /* Change these macros so can be used in void function */ 5734 #undef CHKERRQ 5735 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5736 #undef SETERRQ2 5737 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5738 #undef SETERRQ3 5739 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5740 #undef SETERRQ 5741 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5742 5743 #undef __FUNCT__ 5744 #define __FUNCT__ "matsetvaluesmpiaij_" 5745 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5746 { 5747 Mat mat = *mmat; 5748 PetscInt m = *mm, n = *mn; 5749 InsertMode addv = *maddv; 5750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5751 PetscScalar value; 5752 PetscErrorCode ierr; 5753 5754 MatCheckPreallocated(mat,1); 5755 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5756 5757 #if defined(PETSC_USE_DEBUG) 5758 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5759 #endif 5760 { 5761 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5762 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5763 PetscBool roworiented = aij->roworiented; 5764 5765 /* Some Variables required in the macro */ 5766 Mat A = aij->A; 5767 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5768 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5769 MatScalar *aa = a->a; 5770 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5771 Mat B = aij->B; 5772 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5773 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5774 MatScalar *ba = b->a; 5775 5776 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5777 PetscInt nonew = a->nonew; 5778 MatScalar *ap1,*ap2; 5779 5780 PetscFunctionBegin; 5781 for (i=0; i<m; i++) { 5782 if (im[i] < 0) continue; 5783 #if defined(PETSC_USE_DEBUG) 5784 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5785 #endif 5786 if (im[i] >= rstart && im[i] < rend) { 5787 row = im[i] - rstart; 5788 lastcol1 = -1; 5789 rp1 = aj + ai[row]; 5790 ap1 = aa + ai[row]; 5791 rmax1 = aimax[row]; 5792 nrow1 = ailen[row]; 5793 low1 = 0; 5794 high1 = nrow1; 5795 lastcol2 = -1; 5796 rp2 = bj + bi[row]; 5797 ap2 = ba + bi[row]; 5798 rmax2 = bimax[row]; 5799 nrow2 = bilen[row]; 5800 low2 = 0; 5801 high2 = nrow2; 5802 5803 for (j=0; j<n; j++) { 5804 if (roworiented) value = v[i*n+j]; 5805 else value = v[i+j*m]; 5806 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5807 if (in[j] >= cstart && in[j] < cend) { 5808 col = in[j] - cstart; 5809 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5810 } else if (in[j] < 0) continue; 5811 #if defined(PETSC_USE_DEBUG) 5812 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5813 #endif 5814 else { 5815 if (mat->was_assembled) { 5816 if (!aij->colmap) { 5817 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5818 } 5819 #if defined(PETSC_USE_CTABLE) 5820 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5821 col--; 5822 #else 5823 col = aij->colmap[in[j]] - 1; 5824 #endif 5825 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5827 col = in[j]; 5828 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5829 B = aij->B; 5830 b = (Mat_SeqAIJ*)B->data; 5831 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5832 rp2 = bj + bi[row]; 5833 ap2 = ba + bi[row]; 5834 rmax2 = bimax[row]; 5835 nrow2 = bilen[row]; 5836 low2 = 0; 5837 high2 = nrow2; 5838 bm = aij->B->rmap->n; 5839 ba = b->a; 5840 } 5841 } else col = in[j]; 5842 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5843 } 5844 } 5845 } else if (!aij->donotstash) { 5846 if (roworiented) { 5847 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5848 } else { 5849 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5850 } 5851 } 5852 } 5853 } 5854 PetscFunctionReturnVoid(); 5855 } 5856 5857