1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 1387 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1388 if (!rank) { 1389 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1390 } else { 1391 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1392 } 1393 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1394 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1395 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1396 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1397 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)aij->A->data; 1401 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1408 } 1409 aj = Aloc->j; 1410 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1411 1412 /* copy over the B part */ 1413 Aloc = (Mat_SeqAIJ*)aij->B->data; 1414 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1415 row = mat->rmap->rstart; 1416 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1417 ct = cols; 1418 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; 1422 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 /* 1428 Everyone has to call to draw the matrix since the graphics waits are 1429 synchronized across all processors that share the PetscDraw object 1430 */ 1431 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1432 if (!rank) { 1433 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1436 ierr = MatDestroy(&A);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ" 1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1444 { 1445 PetscErrorCode ierr; 1446 PetscBool iascii,isdraw,issocket,isbinary; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1453 if (iascii || isdraw || isbinary || issocket) { 1454 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSOR_MPIAIJ" 1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscErrorCode ierr; 1465 Vec bb1 = 0; 1466 PetscBool hasop; 1467 1468 PetscFunctionBegin; 1469 if (flag == SOR_APPLY_UPPER) { 1470 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1475 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1476 } 1477 1478 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_EISENSTAT) { 1528 Vec xx1; 1529 1530 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1532 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 if (!mat->diag) { 1536 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1537 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1538 } 1539 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1540 if (hasop) { 1541 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1542 } else { 1543 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1544 } 1545 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1546 1547 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1548 1549 /* local sweep */ 1550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1551 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1552 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1553 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1554 1555 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatPermute_MPIAIJ" 1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1562 { 1563 Mat aA,aB,Aperm; 1564 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1565 PetscScalar *aa,*ba; 1566 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1567 PetscSF rowsf,sf; 1568 IS parcolp = NULL; 1569 PetscBool done; 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1574 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1575 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1576 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1577 1578 /* Invert row permutation to find out where my rows should go */ 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1582 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1583 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1585 1586 /* Invert column permutation to find out where my columns should go */ 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1591 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1594 1595 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1597 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1598 1599 /* Find out where my gcols should go */ 1600 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1601 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1602 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1603 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1604 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1608 1609 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1610 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1611 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt row = rdest[i],rowner; 1614 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1615 for (j=ai[i]; j<ai[i+1]; j++) { 1616 PetscInt cowner,col = cdest[aj[j]]; 1617 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1618 if (rowner == cowner) dnnz[i]++; 1619 else onnz[i]++; 1620 } 1621 for (j=bi[i]; j<bi[i+1]; j++) { 1622 PetscInt cowner,col = gcdest[bj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 } 1628 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1629 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1630 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1631 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1633 1634 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1635 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) { 1638 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1639 PetscInt j0,rowlen; 1640 rowlen = ai[i+1] - ai[i]; 1641 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1642 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1643 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1644 } 1645 rowlen = bi[i+1] - bi[i]; 1646 for (j0=j=0; j<rowlen; j0=j) { 1647 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1648 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } 1651 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1654 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1655 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1657 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1658 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1659 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1660 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1661 *B = Aperm; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1668 { 1669 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1670 Mat A = mat->A,B = mat->B; 1671 PetscErrorCode ierr; 1672 PetscReal isend[5],irecv[5]; 1673 1674 PetscFunctionBegin; 1675 info->block_size = 1.0; 1676 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1677 1678 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1679 isend[3] = info->memory; isend[4] = info->mallocs; 1680 1681 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1682 1683 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1684 isend[3] += info->memory; isend[4] += info->mallocs; 1685 if (flag == MAT_LOCAL) { 1686 info->nz_used = isend[0]; 1687 info->nz_allocated = isend[1]; 1688 info->nz_unneeded = isend[2]; 1689 info->memory = isend[3]; 1690 info->mallocs = isend[4]; 1691 } else if (flag == MAT_GLOBAL_MAX) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } else if (flag == MAT_GLOBAL_SUM) { 1700 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1701 1702 info->nz_used = irecv[0]; 1703 info->nz_allocated = irecv[1]; 1704 info->nz_unneeded = irecv[2]; 1705 info->memory = irecv[3]; 1706 info->mallocs = irecv[4]; 1707 } 1708 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1709 info->fill_ratio_needed = 0; 1710 info->factor_mallocs = 0; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSetOption_MPIAIJ" 1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1717 { 1718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 switch (op) { 1723 case MAT_NEW_NONZERO_LOCATIONS: 1724 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1725 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1726 case MAT_KEEP_NONZERO_PATTERN: 1727 case MAT_NEW_NONZERO_LOCATION_ERR: 1728 case MAT_USE_INODES: 1729 case MAT_IGNORE_ZERO_ENTRIES: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_ROW_ORIENTED: 1735 a->roworiented = flg; 1736 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_NEW_DIAGONALS: 1741 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1742 break; 1743 case MAT_IGNORE_OFF_PROC_ENTRIES: 1744 a->donotstash = flg; 1745 break; 1746 case MAT_SPD: 1747 A->spd_set = PETSC_TRUE; 1748 A->spd = flg; 1749 if (flg) { 1750 A->symmetric = PETSC_TRUE; 1751 A->structurally_symmetric = PETSC_TRUE; 1752 A->symmetric_set = PETSC_TRUE; 1753 A->structurally_symmetric_set = PETSC_TRUE; 1754 } 1755 break; 1756 case MAT_SYMMETRIC: 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_STRUCTURALLY_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_HERMITIAN: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_SYMMETRY_ETERNAL: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 default: 1769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatGetRow_MPIAIJ" 1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1777 { 1778 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1779 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1780 PetscErrorCode ierr; 1781 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1782 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1783 PetscInt *cmap,*idx_p; 1784 1785 PetscFunctionBegin; 1786 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1787 mat->getrowactive = PETSC_TRUE; 1788 1789 if (!mat->rowvalues && (idx || v)) { 1790 /* 1791 allocate enough space to hold information from the longest row. 1792 */ 1793 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1794 PetscInt max = 1,tmp; 1795 for (i=0; i<matin->rmap->n; i++) { 1796 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1797 if (max < tmp) max = tmp; 1798 } 1799 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1800 } 1801 1802 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1803 lrow = row - rstart; 1804 1805 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1806 if (!v) {pvA = 0; pvB = 0;} 1807 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1808 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1809 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1810 nztot = nzA + nzB; 1811 1812 cmap = mat->garray; 1813 if (v || idx) { 1814 if (nztot) { 1815 /* Sort by increasing column numbers, assuming A and B already sorted */ 1816 PetscInt imark = -1; 1817 if (v) { 1818 *v = v_p = mat->rowvalues; 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1821 else break; 1822 } 1823 imark = i; 1824 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1825 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1826 } 1827 if (idx) { 1828 *idx = idx_p = mat->rowindices; 1829 if (imark > -1) { 1830 for (i=0; i<imark; i++) { 1831 idx_p[i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 for (i=0; i<nzB; i++) { 1835 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1836 else break; 1837 } 1838 imark = i; 1839 } 1840 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1841 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1842 } 1843 } else { 1844 if (idx) *idx = 0; 1845 if (v) *v = 0; 1846 } 1847 } 1848 *nz = nztot; 1849 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1850 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1859 1860 PetscFunctionBegin; 1861 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1862 aij->getrowactive = PETSC_FALSE; 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatNorm_MPIAIJ" 1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1869 { 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1872 PetscErrorCode ierr; 1873 PetscInt i,j,cstart = mat->cmap->rstart; 1874 PetscReal sum = 0.0; 1875 MatScalar *v; 1876 1877 PetscFunctionBegin; 1878 if (aij->size == 1) { 1879 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1880 } else { 1881 if (type == NORM_FROBENIUS) { 1882 v = amat->a; 1883 for (i=0; i<amat->nz; i++) { 1884 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1885 } 1886 v = bmat->a; 1887 for (i=0; i<bmat->nz; i++) { 1888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1889 } 1890 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 *norm = PetscSqrtReal(*norm); 1892 } else if (type == NORM_1) { /* max column norm */ 1893 PetscReal *tmp,*tmp2; 1894 PetscInt *jj,*garray = aij->garray; 1895 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1896 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1897 *norm = 0.0; 1898 v = amat->a; jj = amat->j; 1899 for (j=0; j<amat->nz; j++) { 1900 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a; jj = bmat->j; 1903 for (j=0; j<bmat->nz; j++) { 1904 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1905 } 1906 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 for (j=0; j<mat->cmap->N; j++) { 1908 if (tmp2[j] > *norm) *norm = tmp2[j]; 1909 } 1910 ierr = PetscFree(tmp);CHKERRQ(ierr); 1911 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1912 } else if (type == NORM_INFINITY) { /* max row norm */ 1913 PetscReal ntemp = 0.0; 1914 for (j=0; j<aij->A->rmap->n; j++) { 1915 v = amat->a + amat->i[j]; 1916 sum = 0.0; 1917 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1918 sum += PetscAbsScalar(*v); v++; 1919 } 1920 v = bmat->a + bmat->i[j]; 1921 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1922 sum += PetscAbsScalar(*v); v++; 1923 } 1924 if (sum > ntemp) ntemp = sum; 1925 } 1926 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatTranspose_MPIAIJ" 1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1935 { 1936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1937 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1938 PetscErrorCode ierr; 1939 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1940 PetscInt cstart = A->cmap->rstart,ncol; 1941 Mat B; 1942 MatScalar *array; 1943 1944 PetscFunctionBegin; 1945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1946 1947 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1948 ai = Aloc->i; aj = Aloc->j; 1949 bi = Bloc->i; bj = Bloc->j; 1950 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1951 PetscInt *d_nnz,*g_nnz,*o_nnz; 1952 PetscSFNode *oloc; 1953 PETSC_UNUSED PetscSF sf; 1954 1955 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1956 /* compute d_nnz for preallocation */ 1957 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) { 1959 d_nnz[aj[i]]++; 1960 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1961 } 1962 /* compute local off-diagonal contributions */ 1963 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1964 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1965 /* map those to global */ 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1968 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1969 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1970 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1971 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1972 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1973 1974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1975 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1976 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1977 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1978 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1979 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1980 } else { 1981 B = *matout; 1982 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1983 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1984 } 1985 1986 /* copy over the A part */ 1987 array = Aloc->a; 1988 row = A->rmap->rstart; 1989 for (i=0; i<ma; i++) { 1990 ncol = ai[i+1]-ai[i]; 1991 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; aj += ncol; 1994 } 1995 aj = Aloc->j; 1996 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1997 1998 /* copy over the B part */ 1999 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2000 array = Bloc->a; 2001 row = A->rmap->rstart; 2002 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2003 cols_tmp = cols; 2004 for (i=0; i<mb; i++) { 2005 ncol = bi[i+1]-bi[i]; 2006 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2007 row++; 2008 array += ncol; cols_tmp += ncol; 2009 } 2010 ierr = PetscFree(cols);CHKERRQ(ierr); 2011 2012 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2015 *matout = B; 2016 } else { 2017 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2025 { 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat a = aij->A,b = aij->B; 2028 PetscErrorCode ierr; 2029 PetscInt s1,s2,s3; 2030 2031 PetscFunctionBegin; 2032 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2033 if (rr) { 2034 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2035 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2036 /* Overlap communication with computation. */ 2037 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 } 2039 if (ll) { 2040 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2041 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2042 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2043 } 2044 /* scale the diagonal block */ 2045 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2046 2047 if (rr) { 2048 /* Do a scatter end and then right scale the off-diagonal block */ 2049 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2050 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2058 { 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatEqual_MPIAIJ" 2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2070 { 2071 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2072 Mat a,b,c,d; 2073 PetscBool flg; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 a = matA->A; b = matA->B; 2078 c = matB->A; d = matB->B; 2079 2080 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2081 if (flg) { 2082 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2083 } 2084 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatCopy_MPIAIJ" 2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2095 2096 PetscFunctionBegin; 2097 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2098 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2099 /* because of the column compression in the off-processor part of the matrix a->B, 2100 the number of columns in a->B and b->B may be different, hence we cannot call 2101 the MatCopy() directly on the two parts. If need be, we can provide a more 2102 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2103 then copying the submatrices */ 2104 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2107 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUp_MPIAIJ" 2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* 2124 Computes the number of nonzeros per row needed for preallocation when X and Y 2125 have different nonzero structure. 2126 */ 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2130 { 2131 PetscInt i,j,k,nzx,nzy; 2132 2133 PetscFunctionBegin; 2134 /* Set the number of nonzeros in the new matrix */ 2135 for (i=0; i<m; i++) { 2136 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2137 nzx = xi[i+1] - xi[i]; 2138 nzy = yi[i+1] - yi[i]; 2139 nnz[i] = 0; 2140 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2141 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2142 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2143 nnz[i]++; 2144 } 2145 for (; k<nzy; k++) nnz[i]++; 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2154 { 2155 PetscErrorCode ierr; 2156 PetscInt m = Y->rmap->N; 2157 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2158 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2159 2160 PetscFunctionBegin; 2161 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatAXPY_MPIAIJ" 2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2168 { 2169 PetscErrorCode ierr; 2170 PetscInt i; 2171 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2172 PetscBLASInt bnz,one=1; 2173 Mat_SeqAIJ *x,*y; 2174 2175 PetscFunctionBegin; 2176 if (str == SAME_NONZERO_PATTERN) { 2177 PetscScalar alpha = a; 2178 x = (Mat_SeqAIJ*)xx->A->data; 2179 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2180 y = (Mat_SeqAIJ*)yy->A->data; 2181 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2182 x = (Mat_SeqAIJ*)xx->B->data; 2183 y = (Mat_SeqAIJ*)yy->B->data; 2184 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2185 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2186 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2187 } else if (str == SUBSET_NONZERO_PATTERN) { 2188 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2189 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 if (y->xtoy && y->XtoY != xx->B) { 2193 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2194 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2195 } 2196 if (!y->xtoy) { /* get xtoy */ 2197 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2198 y->XtoY = xx->B; 2199 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2200 } 2201 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2202 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2203 } else { 2204 Mat B; 2205 PetscInt *nnz_d,*nnz_o; 2206 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2207 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2208 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2209 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2210 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2211 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2212 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2213 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2214 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2215 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2216 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2217 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2218 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2219 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatConjugate_MPIAIJ" 2228 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2229 { 2230 #if defined(PETSC_USE_COMPLEX) 2231 PetscErrorCode ierr; 2232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2233 2234 PetscFunctionBegin; 2235 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2236 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2237 #else 2238 PetscFunctionBegin; 2239 #endif 2240 PetscFunctionReturn(0); 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatRealPart_MPIAIJ" 2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 2250 PetscFunctionBegin; 2251 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2252 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2265 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #if defined(PETSC_HAVE_PBGL) 2270 2271 #include <boost/parallel/mpi/bsp_process_group.hpp> 2272 #include <boost/graph/distributed/ilu_default_graph.hpp> 2273 #include <boost/graph/distributed/ilu_0_block.hpp> 2274 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2275 #include <boost/graph/distributed/petsc/interface.hpp> 2276 #include <boost/multi_array.hpp> 2277 #include <boost/parallel/distributed_property_map->hpp> 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2281 /* 2282 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2283 */ 2284 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2285 { 2286 namespace petsc = boost::distributed::petsc; 2287 2288 namespace graph_dist = boost::graph::distributed; 2289 using boost::graph::distributed::ilu_default::process_group_type; 2290 using boost::graph::ilu_permuted; 2291 2292 PetscBool row_identity, col_identity; 2293 PetscContainer c; 2294 PetscInt m, n, M, N; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2299 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2300 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2301 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2302 2303 process_group_type pg; 2304 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2305 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2306 lgraph_type& level_graph = *lgraph_p; 2307 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2308 2309 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2310 ilu_permuted(level_graph); 2311 2312 /* put together the new matrix */ 2313 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2314 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2315 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2316 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2317 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2318 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2319 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2320 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2321 2322 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2323 ierr = PetscContainerSetPointer(c, lgraph_p); 2324 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2325 ierr = PetscContainerDestroy(&c); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2331 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2332 { 2333 PetscFunctionBegin; 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatSolve_MPIAIJ" 2339 /* 2340 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2341 */ 2342 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2343 { 2344 namespace graph_dist = boost::graph::distributed; 2345 2346 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2347 lgraph_type *lgraph_p; 2348 PetscContainer c; 2349 PetscErrorCode ierr; 2350 2351 PetscFunctionBegin; 2352 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2353 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2354 ierr = VecCopy(b, x);CHKERRQ(ierr); 2355 2356 PetscScalar *array_x; 2357 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2358 PetscInt sx; 2359 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2360 2361 PetscScalar *array_b; 2362 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2363 PetscInt sb; 2364 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2365 2366 lgraph_type& level_graph = *lgraph_p; 2367 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2368 2369 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2370 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2371 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2372 2373 typedef boost::iterator_property_map<array_ref_type::iterator, 2374 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2375 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2376 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2377 2378 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2379 PetscFunctionReturn(0); 2380 } 2381 #endif 2382 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2386 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2387 { 2388 PetscMPIInt rank,size; 2389 MPI_Comm comm; 2390 PetscErrorCode ierr; 2391 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2392 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2393 PetscInt *rowrange = mat->rmap->range; 2394 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2395 Mat A = aij->A,B=aij->B,C=*matredundant; 2396 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2397 PetscScalar *sbuf_a; 2398 PetscInt nzlocal=a->nz+b->nz; 2399 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2400 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2401 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2402 MatScalar *aworkA,*aworkB; 2403 PetscScalar *vals; 2404 PetscMPIInt tag1,tag2,tag3,imdex; 2405 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2406 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2407 MPI_Status recv_status,*send_status; 2408 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2409 PetscInt **rbuf_j=NULL; 2410 PetscScalar **rbuf_a=NULL; 2411 Mat_Redundant *redund =NULL; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2417 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2418 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2419 2420 if (reuse == MAT_REUSE_MATRIX) { 2421 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2422 if (subsize == 1) { 2423 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2424 redund = c->redundant; 2425 } else { 2426 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2427 redund = c->redundant; 2428 } 2429 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2430 2431 nsends = redund->nsends; 2432 nrecvs = redund->nrecvs; 2433 send_rank = redund->send_rank; 2434 recv_rank = redund->recv_rank; 2435 sbuf_nz = redund->sbuf_nz; 2436 rbuf_nz = redund->rbuf_nz; 2437 sbuf_j = redund->sbuf_j; 2438 sbuf_a = redund->sbuf_a; 2439 rbuf_j = redund->rbuf_j; 2440 rbuf_a = redund->rbuf_a; 2441 } 2442 2443 if (reuse == MAT_INITIAL_MATRIX) { 2444 PetscInt nleftover,np_subcomm; 2445 2446 /* get the destination processors' id send_rank, nsends and nrecvs */ 2447 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2448 2449 np_subcomm = size/nsubcomm; 2450 nleftover = size - nsubcomm*np_subcomm; 2451 2452 /* block of codes below is specific for INTERLACED */ 2453 /* ------------------------------------------------*/ 2454 nsends = 0; nrecvs = 0; 2455 for (i=0; i<size; i++) { 2456 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2457 send_rank[nsends++] = i; 2458 recv_rank[nrecvs++] = i; 2459 } 2460 } 2461 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2462 i = size-nleftover-1; 2463 j = 0; 2464 while (j < nsubcomm - nleftover) { 2465 send_rank[nsends++] = i; 2466 i--; j++; 2467 } 2468 } 2469 2470 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2471 for (i=0; i<nleftover; i++) { 2472 recv_rank[nrecvs++] = size-nleftover+i; 2473 } 2474 } 2475 /*----------------------------------------------*/ 2476 2477 /* allocate sbuf_j, sbuf_a */ 2478 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2479 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2480 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2481 /* 2482 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2483 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2484 */ 2485 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2486 2487 /* copy mat's local entries into the buffers */ 2488 if (reuse == MAT_INITIAL_MATRIX) { 2489 rownz_max = 0; 2490 rptr = sbuf_j; 2491 cols = sbuf_j + rend-rstart + 1; 2492 vals = sbuf_a; 2493 rptr[0] = 0; 2494 for (i=0; i<rend-rstart; i++) { 2495 row = i + rstart; 2496 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2497 ncols = nzA + nzB; 2498 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2499 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2500 /* load the column indices for this row into cols */ 2501 lwrite = 0; 2502 for (l=0; l<nzB; l++) { 2503 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2504 vals[lwrite] = aworkB[l]; 2505 cols[lwrite++] = ctmp; 2506 } 2507 } 2508 for (l=0; l<nzA; l++) { 2509 vals[lwrite] = aworkA[l]; 2510 cols[lwrite++] = cstart + cworkA[l]; 2511 } 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 vals += ncols; 2519 cols += ncols; 2520 rptr[i+1] = rptr[i] + ncols; 2521 if (rownz_max < ncols) rownz_max = ncols; 2522 } 2523 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2524 } else { /* only copy matrix values into sbuf_a */ 2525 rptr = sbuf_j; 2526 vals = sbuf_a; 2527 rptr[0] = 0; 2528 for (i=0; i<rend-rstart; i++) { 2529 row = i + rstart; 2530 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2531 ncols = nzA + nzB; 2532 cworkB = b->j + b->i[i]; 2533 aworkA = a->a + a->i[i]; 2534 aworkB = b->a + b->i[i]; 2535 lwrite = 0; 2536 for (l=0; l<nzB; l++) { 2537 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2538 } 2539 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2540 for (l=0; l<nzB; l++) { 2541 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2542 } 2543 vals += ncols; 2544 rptr[i+1] = rptr[i] + ncols; 2545 } 2546 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2547 2548 /* send nzlocal to others, and recv other's nzlocal */ 2549 /*--------------------------------------------------*/ 2550 if (reuse == MAT_INITIAL_MATRIX) { 2551 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2552 2553 s_waits2 = s_waits3 + nsends; 2554 s_waits1 = s_waits2 + nsends; 2555 r_waits1 = s_waits1 + nsends; 2556 r_waits2 = r_waits1 + nrecvs; 2557 r_waits3 = r_waits2 + nrecvs; 2558 } else { 2559 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2560 2561 r_waits3 = s_waits3 + nsends; 2562 } 2563 2564 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2565 if (reuse == MAT_INITIAL_MATRIX) { 2566 /* get new tags to keep the communication clean */ 2567 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2568 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2569 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2570 2571 /* post receives of other's nzlocal */ 2572 for (i=0; i<nrecvs; i++) { 2573 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* send nzlocal to others */ 2576 for (i=0; i<nsends; i++) { 2577 sbuf_nz[i] = nzlocal; 2578 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2579 } 2580 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2581 count = nrecvs; 2582 while (count) { 2583 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2584 2585 recv_rank[imdex] = recv_status.MPI_SOURCE; 2586 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2587 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2588 2589 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2590 2591 rbuf_nz[imdex] += i + 2; 2592 2593 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2594 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2595 count--; 2596 } 2597 /* wait on sends of nzlocal */ 2598 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2599 /* send mat->i,j to others, and recv from other's */ 2600 /*------------------------------------------------*/ 2601 for (i=0; i<nsends; i++) { 2602 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2603 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2604 } 2605 /* wait on receives of mat->i,j */ 2606 /*------------------------------*/ 2607 count = nrecvs; 2608 while (count) { 2609 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2610 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2611 count--; 2612 } 2613 /* wait on sends of mat->i,j */ 2614 /*---------------------------*/ 2615 if (nsends) { 2616 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2617 } 2618 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2619 2620 /* post receives, send and receive mat->a */ 2621 /*----------------------------------------*/ 2622 for (imdex=0; imdex<nrecvs; imdex++) { 2623 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2624 } 2625 for (i=0; i<nsends; i++) { 2626 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2627 } 2628 count = nrecvs; 2629 while (count) { 2630 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2631 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2632 count--; 2633 } 2634 if (nsends) { 2635 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2636 } 2637 2638 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2639 2640 /* create redundant matrix */ 2641 /*-------------------------*/ 2642 if (reuse == MAT_INITIAL_MATRIX) { 2643 const PetscInt *range; 2644 PetscInt rstart_sub,rend_sub,mloc_sub; 2645 2646 /* compute rownz_max for preallocation */ 2647 for (imdex=0; imdex<nrecvs; imdex++) { 2648 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2649 rptr = rbuf_j[imdex]; 2650 for (i=0; i<j; i++) { 2651 ncols = rptr[i+1] - rptr[i]; 2652 if (rownz_max < ncols) rownz_max = ncols; 2653 } 2654 } 2655 2656 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2657 2658 /* get local size of redundant matrix 2659 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2660 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2661 rstart_sub = range[nsubcomm*subrank]; 2662 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2663 rend_sub = range[nsubcomm*(subrank+1)]; 2664 } else { 2665 rend_sub = mat->rmap->N; 2666 } 2667 mloc_sub = rend_sub - rstart_sub; 2668 2669 if (M == N) { 2670 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2671 } else { /* non-square matrix */ 2672 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2673 } 2674 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2675 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2676 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2677 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2678 } else { 2679 C = *matredundant; 2680 } 2681 2682 /* insert local matrix entries */ 2683 rptr = sbuf_j; 2684 cols = sbuf_j + rend-rstart + 1; 2685 vals = sbuf_a; 2686 for (i=0; i<rend-rstart; i++) { 2687 row = i + rstart; 2688 ncols = rptr[i+1] - rptr[i]; 2689 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 vals += ncols; 2691 cols += ncols; 2692 } 2693 /* insert received matrix entries */ 2694 for (imdex=0; imdex<nrecvs; imdex++) { 2695 rstart = rowrange[recv_rank[imdex]]; 2696 rend = rowrange[recv_rank[imdex]+1]; 2697 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2698 rptr = rbuf_j[imdex]; 2699 cols = rbuf_j[imdex] + rend-rstart + 1; 2700 vals = rbuf_a[imdex]; 2701 for (i=0; i<rend-rstart; i++) { 2702 row = i + rstart; 2703 ncols = rptr[i+1] - rptr[i]; 2704 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2705 vals += ncols; 2706 cols += ncols; 2707 } 2708 } 2709 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2710 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2711 2712 if (reuse == MAT_INITIAL_MATRIX) { 2713 *matredundant = C; 2714 2715 /* create a supporting struct and attach it to C for reuse */ 2716 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2717 if (subsize == 1) { 2718 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2719 c->redundant = redund; 2720 } else { 2721 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2722 c->redundant = redund; 2723 } 2724 2725 redund->nzlocal = nzlocal; 2726 redund->nsends = nsends; 2727 redund->nrecvs = nrecvs; 2728 redund->send_rank = send_rank; 2729 redund->recv_rank = recv_rank; 2730 redund->sbuf_nz = sbuf_nz; 2731 redund->rbuf_nz = rbuf_nz; 2732 redund->sbuf_j = sbuf_j; 2733 redund->sbuf_a = sbuf_a; 2734 redund->rbuf_j = rbuf_j; 2735 redund->rbuf_a = rbuf_a; 2736 redund->psubcomm = NULL; 2737 } 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2743 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2744 { 2745 PetscErrorCode ierr; 2746 MPI_Comm comm; 2747 PetscMPIInt size,subsize; 2748 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2749 Mat_Redundant *redund=NULL; 2750 PetscSubcomm psubcomm=NULL; 2751 MPI_Comm subcomm_in=subcomm; 2752 Mat *matseq; 2753 IS isrow,iscol; 2754 2755 PetscFunctionBegin; 2756 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2757 if (reuse == MAT_INITIAL_MATRIX) { 2758 /* create psubcomm, then get subcomm */ 2759 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2760 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2761 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2762 2763 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2765 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2766 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2767 subcomm = psubcomm->comm; 2768 } else { /* retrieve psubcomm and subcomm */ 2769 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2770 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2771 if (subsize == 1) { 2772 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2773 redund = c->redundant; 2774 } else { 2775 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2776 redund = c->redundant; 2777 } 2778 psubcomm = redund->psubcomm; 2779 } 2780 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2781 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2782 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2783 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2784 if (subsize == 1) { 2785 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2786 c->redundant->psubcomm = psubcomm; 2787 } else { 2788 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2789 c->redundant->psubcomm = psubcomm ; 2790 } 2791 } 2792 PetscFunctionReturn(0); 2793 } 2794 } 2795 2796 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2797 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2798 if (reuse == MAT_INITIAL_MATRIX) { 2799 /* create a local sequential matrix matseq[0] */ 2800 mloc_sub = PETSC_DECIDE; 2801 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2802 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2803 rstart = rend - mloc_sub; 2804 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2805 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2806 } else { /* reuse == MAT_REUSE_MATRIX */ 2807 if (subsize == 1) { 2808 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2809 redund = c->redundant; 2810 } else { 2811 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2812 redund = c->redundant; 2813 } 2814 2815 isrow = redund->isrow; 2816 iscol = redund->iscol; 2817 matseq = redund->matseq; 2818 } 2819 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2820 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2821 2822 if (reuse == MAT_INITIAL_MATRIX) { 2823 /* create a supporting struct and attach it to C for reuse */ 2824 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2825 if (subsize == 1) { 2826 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2827 c->redundant = redund; 2828 } else { 2829 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2830 c->redundant = redund; 2831 } 2832 redund->isrow = isrow; 2833 redund->iscol = iscol; 2834 redund->matseq = matseq; 2835 redund->psubcomm = psubcomm; 2836 } 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2843 { 2844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2845 PetscErrorCode ierr; 2846 PetscInt i,*idxb = 0; 2847 PetscScalar *va,*vb; 2848 Vec vtmp; 2849 2850 PetscFunctionBegin; 2851 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2852 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2853 if (idx) { 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2856 } 2857 } 2858 2859 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2860 if (idx) { 2861 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2862 } 2863 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2864 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2865 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2868 va[i] = vb[i]; 2869 if (idx) idx[i] = a->garray[idxb[i]]; 2870 } 2871 } 2872 2873 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2874 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2875 ierr = PetscFree(idxb);CHKERRQ(ierr); 2876 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2883 { 2884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2885 PetscErrorCode ierr; 2886 PetscInt i,*idxb = 0; 2887 PetscScalar *va,*vb; 2888 Vec vtmp; 2889 2890 PetscFunctionBegin; 2891 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2892 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2893 if (idx) { 2894 for (i=0; i<A->cmap->n; i++) { 2895 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2896 } 2897 } 2898 2899 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2900 if (idx) { 2901 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2902 } 2903 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2904 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2905 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2908 va[i] = vb[i]; 2909 if (idx) idx[i] = a->garray[idxb[i]]; 2910 } 2911 } 2912 2913 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2914 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2915 ierr = PetscFree(idxb);CHKERRQ(ierr); 2916 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2923 { 2924 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2925 PetscInt n = A->rmap->n; 2926 PetscInt cstart = A->cmap->rstart; 2927 PetscInt *cmap = mat->garray; 2928 PetscInt *diagIdx, *offdiagIdx; 2929 Vec diagV, offdiagV; 2930 PetscScalar *a, *diagA, *offdiagA; 2931 PetscInt r; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2938 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 for (r = 0; r < n; ++r) { 2944 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2945 a[r] = diagA[r]; 2946 idx[r] = cstart + diagIdx[r]; 2947 } else { 2948 a[r] = offdiagA[r]; 2949 idx[r] = cmap[offdiagIdx[r]]; 2950 } 2951 } 2952 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2956 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2957 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2964 { 2965 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2966 PetscInt n = A->rmap->n; 2967 PetscInt cstart = A->cmap->rstart; 2968 PetscInt *cmap = mat->garray; 2969 PetscInt *diagIdx, *offdiagIdx; 2970 Vec diagV, offdiagV; 2971 PetscScalar *a, *diagA, *offdiagA; 2972 PetscInt r; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2979 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 for (r = 0; r < n; ++r) { 2985 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2986 a[r] = diagA[r]; 2987 idx[r] = cstart + diagIdx[r]; 2988 } else { 2989 a[r] = offdiagA[r]; 2990 idx[r] = cmap[offdiagIdx[r]]; 2991 } 2992 } 2993 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2997 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2998 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3005 { 3006 PetscErrorCode ierr; 3007 Mat *dummy; 3008 3009 PetscFunctionBegin; 3010 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3011 *newmat = *dummy; 3012 ierr = PetscFree(dummy);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3018 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3019 { 3020 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3030 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3031 { 3032 PetscErrorCode ierr; 3033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3034 3035 PetscFunctionBegin; 3036 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3037 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3038 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /* -------------------------------------------------------------------*/ 3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3045 MatGetRow_MPIAIJ, 3046 MatRestoreRow_MPIAIJ, 3047 MatMult_MPIAIJ, 3048 /* 4*/ MatMultAdd_MPIAIJ, 3049 MatMultTranspose_MPIAIJ, 3050 MatMultTransposeAdd_MPIAIJ, 3051 #if defined(PETSC_HAVE_PBGL) 3052 MatSolve_MPIAIJ, 3053 #else 3054 0, 3055 #endif 3056 0, 3057 0, 3058 /*10*/ 0, 3059 0, 3060 0, 3061 MatSOR_MPIAIJ, 3062 MatTranspose_MPIAIJ, 3063 /*15*/ MatGetInfo_MPIAIJ, 3064 MatEqual_MPIAIJ, 3065 MatGetDiagonal_MPIAIJ, 3066 MatDiagonalScale_MPIAIJ, 3067 MatNorm_MPIAIJ, 3068 /*20*/ MatAssemblyBegin_MPIAIJ, 3069 MatAssemblyEnd_MPIAIJ, 3070 MatSetOption_MPIAIJ, 3071 MatZeroEntries_MPIAIJ, 3072 /*24*/ MatZeroRows_MPIAIJ, 3073 0, 3074 #if defined(PETSC_HAVE_PBGL) 3075 0, 3076 #else 3077 0, 3078 #endif 3079 0, 3080 0, 3081 /*29*/ MatSetUp_MPIAIJ, 3082 #if defined(PETSC_HAVE_PBGL) 3083 0, 3084 #else 3085 0, 3086 #endif 3087 0, 3088 0, 3089 0, 3090 /*34*/ MatDuplicate_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*39*/ MatAXPY_MPIAIJ, 3096 MatGetSubMatrices_MPIAIJ, 3097 MatIncreaseOverlap_MPIAIJ, 3098 MatGetValues_MPIAIJ, 3099 MatCopy_MPIAIJ, 3100 /*44*/ MatGetRowMax_MPIAIJ, 3101 MatScale_MPIAIJ, 3102 0, 3103 0, 3104 MatZeroRowsColumns_MPIAIJ, 3105 /*49*/ MatSetRandom_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*54*/ MatFDColoringCreate_MPIXAIJ, 3111 0, 3112 MatSetUnfactored_MPIAIJ, 3113 MatPermute_MPIAIJ, 3114 0, 3115 /*59*/ MatGetSubMatrix_MPIAIJ, 3116 MatDestroy_MPIAIJ, 3117 MatView_MPIAIJ, 3118 0, 3119 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3120 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3121 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3122 0, 3123 0, 3124 0, 3125 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3126 MatGetRowMinAbs_MPIAIJ, 3127 0, 3128 MatSetColoring_MPIAIJ, 3129 0, 3130 MatSetValuesAdifor_MPIAIJ, 3131 /*75*/ MatFDColoringApply_AIJ, 3132 0, 3133 0, 3134 0, 3135 MatFindZeroDiagonals_MPIAIJ, 3136 /*80*/ 0, 3137 0, 3138 0, 3139 /*83*/ MatLoad_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 0, 3145 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3146 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3147 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3148 MatPtAP_MPIAIJ_MPIAIJ, 3149 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3150 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3151 0, 3152 0, 3153 0, 3154 0, 3155 /*99*/ 0, 3156 0, 3157 0, 3158 MatConjugate_MPIAIJ, 3159 0, 3160 /*104*/MatSetValuesRow_MPIAIJ, 3161 MatRealPart_MPIAIJ, 3162 MatImaginaryPart_MPIAIJ, 3163 0, 3164 0, 3165 /*109*/0, 3166 MatGetRedundantMatrix_MPIAIJ, 3167 MatGetRowMin_MPIAIJ, 3168 0, 3169 0, 3170 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*119*/0, 3176 0, 3177 0, 3178 0, 3179 MatGetMultiProcBlock_MPIAIJ, 3180 /*124*/MatFindNonzeroRows_MPIAIJ, 3181 MatGetColumnNorms_MPIAIJ, 3182 MatInvertBlockDiagonal_MPIAIJ, 3183 0, 3184 MatGetSubMatricesParallel_MPIAIJ, 3185 /*129*/0, 3186 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3187 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3188 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3189 0, 3190 /*134*/0, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*139*/0, 3196 0, 3197 0, 3198 MatFDColoringSetUp_MPIXAIJ 3199 }; 3200 3201 /* ----------------------------------------------------------------------------------------*/ 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3205 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3206 { 3207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3208 PetscErrorCode ierr; 3209 3210 PetscFunctionBegin; 3211 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3212 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3213 PetscFunctionReturn(0); 3214 } 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3218 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3219 { 3220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3225 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3231 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3232 { 3233 Mat_MPIAIJ *b; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3239 b = (Mat_MPIAIJ*)B->data; 3240 3241 if (!B->preallocated) { 3242 /* Explicitly create 2 MATSEQAIJ matrices. */ 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3246 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3248 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3249 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3250 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3251 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3253 } 3254 3255 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3256 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3257 B->preallocated = PETSC_TRUE; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3264 { 3265 Mat mat; 3266 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 *newmat = 0; 3271 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3272 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3274 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3275 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3276 a = (Mat_MPIAIJ*)mat->data; 3277 3278 mat->factortype = matin->factortype; 3279 mat->assembled = PETSC_TRUE; 3280 mat->insertmode = NOT_SET_VALUES; 3281 mat->preallocated = PETSC_TRUE; 3282 3283 a->size = oldmat->size; 3284 a->rank = oldmat->rank; 3285 a->donotstash = oldmat->donotstash; 3286 a->roworiented = oldmat->roworiented; 3287 a->rowindices = 0; 3288 a->rowvalues = 0; 3289 a->getrowactive = PETSC_FALSE; 3290 3291 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3292 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3293 3294 if (oldmat->colmap) { 3295 #if defined(PETSC_USE_CTABLE) 3296 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3297 #else 3298 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3299 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3300 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 #endif 3302 } else a->colmap = 0; 3303 if (oldmat->garray) { 3304 PetscInt len; 3305 len = oldmat->B->cmap->n; 3306 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3307 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3308 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3309 } else a->garray = 0; 3310 3311 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3312 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3313 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3315 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3319 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3320 *newmat = mat; 3321 PetscFunctionReturn(0); 3322 } 3323 3324 3325 3326 #undef __FUNCT__ 3327 #define __FUNCT__ "MatLoad_MPIAIJ" 3328 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3329 { 3330 PetscScalar *vals,*svals; 3331 MPI_Comm comm; 3332 PetscErrorCode ierr; 3333 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3334 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3335 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3336 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3337 PetscInt cend,cstart,n,*rowners,sizesset=1; 3338 int fd; 3339 PetscInt bs = newMat->rmap->bs; 3340 3341 PetscFunctionBegin; 3342 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3344 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3345 if (!rank) { 3346 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3347 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3348 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3349 } 3350 3351 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3352 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3353 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3354 if (bs < 0) bs = 1; 3355 3356 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3357 3358 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3359 M = header[1]; N = header[2]; 3360 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3361 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3362 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3363 3364 /* If global sizes are set, check if they are consistent with that given in the file */ 3365 if (sizesset) { 3366 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3367 } 3368 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3369 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3370 3371 /* determine ownership of all (block) rows */ 3372 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3373 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3374 else m = newMat->rmap->n; /* Set by user */ 3375 3376 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3377 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3378 3379 /* First process needs enough room for process with most rows */ 3380 if (!rank) { 3381 mmax = rowners[1]; 3382 for (i=2; i<=size; i++) { 3383 mmax = PetscMax(mmax, rowners[i]); 3384 } 3385 } else mmax = -1; /* unused, but compilers complain */ 3386 3387 rowners[0] = 0; 3388 for (i=2; i<=size; i++) { 3389 rowners[i] += rowners[i-1]; 3390 } 3391 rstart = rowners[rank]; 3392 rend = rowners[rank+1]; 3393 3394 /* distribute row lengths to all processors */ 3395 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3396 if (!rank) { 3397 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3398 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3399 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3400 for (j=0; j<m; j++) { 3401 procsnz[0] += ourlens[j]; 3402 } 3403 for (i=1; i<size; i++) { 3404 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3405 /* calculate the number of nonzeros on each processor */ 3406 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3407 procsnz[i] += rowlengths[j]; 3408 } 3409 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3410 } 3411 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3412 } else { 3413 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3414 } 3415 3416 if (!rank) { 3417 /* determine max buffer needed and allocate it */ 3418 maxnz = 0; 3419 for (i=0; i<size; i++) { 3420 maxnz = PetscMax(maxnz,procsnz[i]); 3421 } 3422 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3423 3424 /* read in my part of the matrix column indices */ 3425 nz = procsnz[0]; 3426 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3428 3429 /* read in every one elses and ship off */ 3430 for (i=1; i<size; i++) { 3431 nz = procsnz[i]; 3432 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3433 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3434 } 3435 ierr = PetscFree(cols);CHKERRQ(ierr); 3436 } else { 3437 /* determine buffer space needed for message */ 3438 nz = 0; 3439 for (i=0; i<m; i++) { 3440 nz += ourlens[i]; 3441 } 3442 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3443 3444 /* receive message of column indices*/ 3445 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3446 } 3447 3448 /* determine column ownership if matrix is not square */ 3449 if (N != M) { 3450 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3451 else n = newMat->cmap->n; 3452 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3453 cstart = cend - n; 3454 } else { 3455 cstart = rstart; 3456 cend = rend; 3457 n = cend - cstart; 3458 } 3459 3460 /* loop over local rows, determining number of off diagonal entries */ 3461 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3462 jj = 0; 3463 for (i=0; i<m; i++) { 3464 for (j=0; j<ourlens[i]; j++) { 3465 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3466 jj++; 3467 } 3468 } 3469 3470 for (i=0; i<m; i++) { 3471 ourlens[i] -= offlens[i]; 3472 } 3473 if (!sizesset) { 3474 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3475 } 3476 3477 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3478 3479 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3480 3481 for (i=0; i<m; i++) { 3482 ourlens[i] += offlens[i]; 3483 } 3484 3485 if (!rank) { 3486 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3487 3488 /* read in my part of the matrix numerical values */ 3489 nz = procsnz[0]; 3490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3491 3492 /* insert into matrix */ 3493 jj = rstart; 3494 smycols = mycols; 3495 svals = vals; 3496 for (i=0; i<m; i++) { 3497 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3498 smycols += ourlens[i]; 3499 svals += ourlens[i]; 3500 jj++; 3501 } 3502 3503 /* read in other processors and ship out */ 3504 for (i=1; i<size; i++) { 3505 nz = procsnz[i]; 3506 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3507 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3510 } else { 3511 /* receive numeric values */ 3512 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3513 3514 /* receive message of values*/ 3515 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3516 3517 /* insert into matrix */ 3518 jj = rstart; 3519 smycols = mycols; 3520 svals = vals; 3521 for (i=0; i<m; i++) { 3522 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3523 smycols += ourlens[i]; 3524 svals += ourlens[i]; 3525 jj++; 3526 } 3527 } 3528 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3529 ierr = PetscFree(vals);CHKERRQ(ierr); 3530 ierr = PetscFree(mycols);CHKERRQ(ierr); 3531 ierr = PetscFree(rowners);CHKERRQ(ierr); 3532 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3540 { 3541 PetscErrorCode ierr; 3542 IS iscol_local; 3543 PetscInt csize; 3544 3545 PetscFunctionBegin; 3546 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3547 if (call == MAT_REUSE_MATRIX) { 3548 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3549 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3550 } else { 3551 PetscInt cbs; 3552 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3553 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3554 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3555 } 3556 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3557 if (call == MAT_INITIAL_MATRIX) { 3558 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3559 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3560 } 3561 PetscFunctionReturn(0); 3562 } 3563 3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3567 /* 3568 Not great since it makes two copies of the submatrix, first an SeqAIJ 3569 in local and then by concatenating the local matrices the end result. 3570 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3571 3572 Note: This requires a sequential iscol with all indices. 3573 */ 3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3575 { 3576 PetscErrorCode ierr; 3577 PetscMPIInt rank,size; 3578 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3579 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3580 PetscBool allcolumns, colflag; 3581 Mat M,Mreuse; 3582 MatScalar *vwork,*aa; 3583 MPI_Comm comm; 3584 Mat_SeqAIJ *aij; 3585 3586 PetscFunctionBegin; 3587 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3590 3591 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3592 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3593 if (colflag && ncol == mat->cmap->N) { 3594 allcolumns = PETSC_TRUE; 3595 } else { 3596 allcolumns = PETSC_FALSE; 3597 } 3598 if (call == MAT_REUSE_MATRIX) { 3599 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3600 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3601 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3602 } else { 3603 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3604 } 3605 3606 /* 3607 m - number of local rows 3608 n - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3612 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3613 if (call == MAT_INITIAL_MATRIX) { 3614 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3615 ii = aij->i; 3616 jj = aij->j; 3617 3618 /* 3619 Determine the number of non-zeros in the diagonal and off-diagonal 3620 portions of the matrix in order to do correct preallocation 3621 */ 3622 3623 /* first get start and end of "diagonal" columns */ 3624 if (csize == PETSC_DECIDE) { 3625 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3626 if (mglobal == n) { /* square matrix */ 3627 nlocal = m; 3628 } else { 3629 nlocal = n/size + ((n % size) > rank); 3630 } 3631 } else { 3632 nlocal = csize; 3633 } 3634 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3635 rstart = rend - nlocal; 3636 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3637 3638 /* next, compute all the lengths */ 3639 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3640 olens = dlens + m; 3641 for (i=0; i<m; i++) { 3642 jend = ii[i+1] - ii[i]; 3643 olen = 0; 3644 dlen = 0; 3645 for (j=0; j<jend; j++) { 3646 if (*jj < rstart || *jj >= rend) olen++; 3647 else dlen++; 3648 jj++; 3649 } 3650 olens[i] = olen; 3651 dlens[i] = dlen; 3652 } 3653 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3654 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3655 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3656 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3657 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3658 ierr = PetscFree(dlens);CHKERRQ(ierr); 3659 } else { 3660 PetscInt ml,nl; 3661 3662 M = *newmat; 3663 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3664 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3665 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3666 /* 3667 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3668 rather than the slower MatSetValues(). 3669 */ 3670 M->was_assembled = PETSC_TRUE; 3671 M->assembled = PETSC_FALSE; 3672 } 3673 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3674 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3675 ii = aij->i; 3676 jj = aij->j; 3677 aa = aij->a; 3678 for (i=0; i<m; i++) { 3679 row = rstart + i; 3680 nz = ii[i+1] - ii[i]; 3681 cwork = jj; jj += nz; 3682 vwork = aa; aa += nz; 3683 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3684 } 3685 3686 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 *newmat = M; 3689 3690 /* save submatrix used in processor for next request */ 3691 if (call == MAT_INITIAL_MATRIX) { 3692 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3693 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3694 } 3695 PetscFunctionReturn(0); 3696 } 3697 3698 #undef __FUNCT__ 3699 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3700 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3701 { 3702 PetscInt m,cstart, cend,j,nnz,i,d; 3703 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3704 const PetscInt *JJ; 3705 PetscScalar *values; 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3710 3711 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3712 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3713 m = B->rmap->n; 3714 cstart = B->cmap->rstart; 3715 cend = B->cmap->rend; 3716 rstart = B->rmap->rstart; 3717 3718 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3719 3720 #if defined(PETSC_USE_DEBUGGING) 3721 for (i=0; i<m; i++) { 3722 nnz = Ii[i+1]- Ii[i]; 3723 JJ = J + Ii[i]; 3724 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3725 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3726 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3727 } 3728 #endif 3729 3730 for (i=0; i<m; i++) { 3731 nnz = Ii[i+1]- Ii[i]; 3732 JJ = J + Ii[i]; 3733 nnz_max = PetscMax(nnz_max,nnz); 3734 d = 0; 3735 for (j=0; j<nnz; j++) { 3736 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3737 } 3738 d_nnz[i] = d; 3739 o_nnz[i] = nnz - d; 3740 } 3741 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3742 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3743 3744 if (v) values = (PetscScalar*)v; 3745 else { 3746 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3747 } 3748 3749 for (i=0; i<m; i++) { 3750 ii = i + rstart; 3751 nnz = Ii[i+1]- Ii[i]; 3752 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3753 } 3754 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 3757 if (!v) { 3758 ierr = PetscFree(values);CHKERRQ(ierr); 3759 } 3760 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3766 /*@ 3767 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3768 (the default parallel PETSc format). 3769 3770 Collective on MPI_Comm 3771 3772 Input Parameters: 3773 + B - the matrix 3774 . i - the indices into j for the start of each local row (starts with zero) 3775 . j - the column indices for each local row (starts with zero) 3776 - v - optional values in the matrix 3777 3778 Level: developer 3779 3780 Notes: 3781 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3782 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3783 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3784 3785 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3786 3787 The format which is used for the sparse matrix input, is equivalent to a 3788 row-major ordering.. i.e for the following matrix, the input data expected is 3789 as shown: 3790 3791 1 0 0 3792 2 0 3 P0 3793 ------- 3794 4 5 6 P1 3795 3796 Process0 [P0]: rows_owned=[0,1] 3797 i = {0,1,3} [size = nrow+1 = 2+1] 3798 j = {0,0,2} [size = nz = 6] 3799 v = {1,2,3} [size = nz = 6] 3800 3801 Process1 [P1]: rows_owned=[2] 3802 i = {0,3} [size = nrow+1 = 1+1] 3803 j = {0,1,2} [size = nz = 6] 3804 v = {4,5,6} [size = nz = 6] 3805 3806 .keywords: matrix, aij, compressed row, sparse, parallel 3807 3808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3809 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3810 @*/ 3811 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3812 { 3813 PetscErrorCode ierr; 3814 3815 PetscFunctionBegin; 3816 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3817 PetscFunctionReturn(0); 3818 } 3819 3820 #undef __FUNCT__ 3821 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3822 /*@C 3823 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3824 (the default parallel PETSc format). For good matrix assembly performance 3825 the user should preallocate the matrix storage by setting the parameters 3826 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3827 performance can be increased by more than a factor of 50. 3828 3829 Collective on MPI_Comm 3830 3831 Input Parameters: 3832 + B - the matrix 3833 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3834 (same value is used for all local rows) 3835 . d_nnz - array containing the number of nonzeros in the various rows of the 3836 DIAGONAL portion of the local submatrix (possibly different for each row) 3837 or NULL, if d_nz is used to specify the nonzero structure. 3838 The size of this array is equal to the number of local rows, i.e 'm'. 3839 For matrices that will be factored, you must leave room for (and set) 3840 the diagonal entry even if it is zero. 3841 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3842 submatrix (same value is used for all local rows). 3843 - o_nnz - array containing the number of nonzeros in the various rows of the 3844 OFF-DIAGONAL portion of the local submatrix (possibly different for 3845 each row) or NULL, if o_nz is used to specify the nonzero 3846 structure. The size of this array is equal to the number 3847 of local rows, i.e 'm'. 3848 3849 If the *_nnz parameter is given then the *_nz parameter is ignored 3850 3851 The AIJ format (also called the Yale sparse matrix format or 3852 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3853 storage. The stored row and column indices begin with zero. 3854 See Users-Manual: ch_mat for details. 3855 3856 The parallel matrix is partitioned such that the first m0 rows belong to 3857 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3858 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3859 3860 The DIAGONAL portion of the local submatrix of a processor can be defined 3861 as the submatrix which is obtained by extraction the part corresponding to 3862 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3863 first row that belongs to the processor, r2 is the last row belonging to 3864 the this processor, and c1-c2 is range of indices of the local part of a 3865 vector suitable for applying the matrix to. This is an mxn matrix. In the 3866 common case of a square matrix, the row and column ranges are the same and 3867 the DIAGONAL part is also square. The remaining portion of the local 3868 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3869 3870 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3871 3872 You can call MatGetInfo() to get information on how effective the preallocation was; 3873 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3874 You can also run with the option -info and look for messages with the string 3875 malloc in them to see if additional memory allocation was needed. 3876 3877 Example usage: 3878 3879 Consider the following 8x8 matrix with 34 non-zero values, that is 3880 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3881 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3882 as follows: 3883 3884 .vb 3885 1 2 0 | 0 3 0 | 0 4 3886 Proc0 0 5 6 | 7 0 0 | 8 0 3887 9 0 10 | 11 0 0 | 12 0 3888 ------------------------------------- 3889 13 0 14 | 15 16 17 | 0 0 3890 Proc1 0 18 0 | 19 20 21 | 0 0 3891 0 0 0 | 22 23 0 | 24 0 3892 ------------------------------------- 3893 Proc2 25 26 27 | 0 0 28 | 29 0 3894 30 0 0 | 31 32 33 | 0 34 3895 .ve 3896 3897 This can be represented as a collection of submatrices as: 3898 3899 .vb 3900 A B C 3901 D E F 3902 G H I 3903 .ve 3904 3905 Where the submatrices A,B,C are owned by proc0, D,E,F are 3906 owned by proc1, G,H,I are owned by proc2. 3907 3908 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3909 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3910 The 'M','N' parameters are 8,8, and have the same values on all procs. 3911 3912 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3913 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3914 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3915 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3916 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3917 matrix, ans [DF] as another SeqAIJ matrix. 3918 3919 When d_nz, o_nz parameters are specified, d_nz storage elements are 3920 allocated for every row of the local diagonal submatrix, and o_nz 3921 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3922 One way to choose d_nz and o_nz is to use the max nonzerors per local 3923 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3924 In this case, the values of d_nz,o_nz are: 3925 .vb 3926 proc0 : dnz = 2, o_nz = 2 3927 proc1 : dnz = 3, o_nz = 2 3928 proc2 : dnz = 1, o_nz = 4 3929 .ve 3930 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3931 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3932 for proc3. i.e we are using 12+15+10=37 storage locations to store 3933 34 values. 3934 3935 When d_nnz, o_nnz parameters are specified, the storage is specified 3936 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3937 In the above case the values for d_nnz,o_nnz are: 3938 .vb 3939 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3940 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3941 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3942 .ve 3943 Here the space allocated is sum of all the above values i.e 34, and 3944 hence pre-allocation is perfect. 3945 3946 Level: intermediate 3947 3948 .keywords: matrix, aij, compressed row, sparse, parallel 3949 3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3951 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3952 @*/ 3953 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3954 { 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3959 PetscValidType(B,1); 3960 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3961 PetscFunctionReturn(0); 3962 } 3963 3964 #undef __FUNCT__ 3965 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3966 /*@ 3967 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3968 CSR format the local rows. 3969 3970 Collective on MPI_Comm 3971 3972 Input Parameters: 3973 + comm - MPI communicator 3974 . m - number of local rows (Cannot be PETSC_DECIDE) 3975 . n - This value should be the same as the local size used in creating the 3976 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3977 calculated if N is given) For square matrices n is almost always m. 3978 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3979 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3980 . i - row indices 3981 . j - column indices 3982 - a - matrix values 3983 3984 Output Parameter: 3985 . mat - the matrix 3986 3987 Level: intermediate 3988 3989 Notes: 3990 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3991 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3992 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3993 3994 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3995 3996 The format which is used for the sparse matrix input, is equivalent to a 3997 row-major ordering.. i.e for the following matrix, the input data expected is 3998 as shown: 3999 4000 1 0 0 4001 2 0 3 P0 4002 ------- 4003 4 5 6 P1 4004 4005 Process0 [P0]: rows_owned=[0,1] 4006 i = {0,1,3} [size = nrow+1 = 2+1] 4007 j = {0,0,2} [size = nz = 6] 4008 v = {1,2,3} [size = nz = 6] 4009 4010 Process1 [P1]: rows_owned=[2] 4011 i = {0,3} [size = nrow+1 = 1+1] 4012 j = {0,1,2} [size = nz = 6] 4013 v = {4,5,6} [size = nz = 6] 4014 4015 .keywords: matrix, aij, compressed row, sparse, parallel 4016 4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4018 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4019 @*/ 4020 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4021 { 4022 PetscErrorCode ierr; 4023 4024 PetscFunctionBegin; 4025 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4026 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4027 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4028 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4029 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4030 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4031 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 #undef __FUNCT__ 4036 #define __FUNCT__ "MatCreateAIJ" 4037 /*@C 4038 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4039 (the default parallel PETSc format). For good matrix assembly performance 4040 the user should preallocate the matrix storage by setting the parameters 4041 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4042 performance can be increased by more than a factor of 50. 4043 4044 Collective on MPI_Comm 4045 4046 Input Parameters: 4047 + comm - MPI communicator 4048 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4049 This value should be the same as the local size used in creating the 4050 y vector for the matrix-vector product y = Ax. 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4057 (same value is used for all local rows) 4058 . d_nnz - array containing the number of nonzeros in the various rows of the 4059 DIAGONAL portion of the local submatrix (possibly different for each row) 4060 or NULL, if d_nz is used to specify the nonzero structure. 4061 The size of this array is equal to the number of local rows, i.e 'm'. 4062 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4063 submatrix (same value is used for all local rows). 4064 - o_nnz - array containing the number of nonzeros in the various rows of the 4065 OFF-DIAGONAL portion of the local submatrix (possibly different for 4066 each row) or NULL, if o_nz is used to specify the nonzero 4067 structure. The size of this array is equal to the number 4068 of local rows, i.e 'm'. 4069 4070 Output Parameter: 4071 . A - the matrix 4072 4073 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4074 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4075 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4076 4077 Notes: 4078 If the *_nnz parameter is given then the *_nz parameter is ignored 4079 4080 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4081 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4082 storage requirements for this matrix. 4083 4084 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4085 processor than it must be used on all processors that share the object for 4086 that argument. 4087 4088 The user MUST specify either the local or global matrix dimensions 4089 (possibly both). 4090 4091 The parallel matrix is partitioned across processors such that the 4092 first m0 rows belong to process 0, the next m1 rows belong to 4093 process 1, the next m2 rows belong to process 2 etc.. where 4094 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4095 values corresponding to [m x N] submatrix. 4096 4097 The columns are logically partitioned with the n0 columns belonging 4098 to 0th partition, the next n1 columns belonging to the next 4099 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4100 4101 The DIAGONAL portion of the local submatrix on any given processor 4102 is the submatrix corresponding to the rows and columns m,n 4103 corresponding to the given processor. i.e diagonal matrix on 4104 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4105 etc. The remaining portion of the local submatrix [m x (N-n)] 4106 constitute the OFF-DIAGONAL portion. The example below better 4107 illustrates this concept. 4108 4109 For a square global matrix we define each processor's diagonal portion 4110 to be its local rows and the corresponding columns (a square submatrix); 4111 each processor's off-diagonal portion encompasses the remainder of the 4112 local matrix (a rectangular submatrix). 4113 4114 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4115 4116 When calling this routine with a single process communicator, a matrix of 4117 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4118 type of communicator, use the construction mechanism: 4119 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4120 4121 By default, this format uses inodes (identical nodes) when possible. 4122 We search for consecutive rows with the same nonzero structure, thereby 4123 reusing matrix information to achieve increased efficiency. 4124 4125 Options Database Keys: 4126 + -mat_no_inode - Do not use inodes 4127 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4128 - -mat_aij_oneindex - Internally use indexing starting at 1 4129 rather than 0. Note that when calling MatSetValues(), 4130 the user still MUST index entries starting at 0! 4131 4132 4133 Example usage: 4134 4135 Consider the following 8x8 matrix with 34 non-zero values, that is 4136 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4137 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4138 as follows: 4139 4140 .vb 4141 1 2 0 | 0 3 0 | 0 4 4142 Proc0 0 5 6 | 7 0 0 | 8 0 4143 9 0 10 | 11 0 0 | 12 0 4144 ------------------------------------- 4145 13 0 14 | 15 16 17 | 0 0 4146 Proc1 0 18 0 | 19 20 21 | 0 0 4147 0 0 0 | 22 23 0 | 24 0 4148 ------------------------------------- 4149 Proc2 25 26 27 | 0 0 28 | 29 0 4150 30 0 0 | 31 32 33 | 0 34 4151 .ve 4152 4153 This can be represented as a collection of submatrices as: 4154 4155 .vb 4156 A B C 4157 D E F 4158 G H I 4159 .ve 4160 4161 Where the submatrices A,B,C are owned by proc0, D,E,F are 4162 owned by proc1, G,H,I are owned by proc2. 4163 4164 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4165 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4166 The 'M','N' parameters are 8,8, and have the same values on all procs. 4167 4168 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4169 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4170 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4171 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4172 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4173 matrix, ans [DF] as another SeqAIJ matrix. 4174 4175 When d_nz, o_nz parameters are specified, d_nz storage elements are 4176 allocated for every row of the local diagonal submatrix, and o_nz 4177 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4178 One way to choose d_nz and o_nz is to use the max nonzerors per local 4179 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4180 In this case, the values of d_nz,o_nz are: 4181 .vb 4182 proc0 : dnz = 2, o_nz = 2 4183 proc1 : dnz = 3, o_nz = 2 4184 proc2 : dnz = 1, o_nz = 4 4185 .ve 4186 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4187 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4188 for proc3. i.e we are using 12+15+10=37 storage locations to store 4189 34 values. 4190 4191 When d_nnz, o_nnz parameters are specified, the storage is specified 4192 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4193 In the above case the values for d_nnz,o_nnz are: 4194 .vb 4195 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4196 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4197 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4198 .ve 4199 Here the space allocated is sum of all the above values i.e 34, and 4200 hence pre-allocation is perfect. 4201 4202 Level: intermediate 4203 4204 .keywords: matrix, aij, compressed row, sparse, parallel 4205 4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4207 MPIAIJ, MatCreateMPIAIJWithArrays() 4208 @*/ 4209 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4210 { 4211 PetscErrorCode ierr; 4212 PetscMPIInt size; 4213 4214 PetscFunctionBegin; 4215 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4216 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4218 if (size > 1) { 4219 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4220 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4221 } else { 4222 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4223 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4224 } 4225 PetscFunctionReturn(0); 4226 } 4227 4228 #undef __FUNCT__ 4229 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4230 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4231 { 4232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4233 4234 PetscFunctionBegin; 4235 if (Ad) *Ad = a->A; 4236 if (Ao) *Ao = a->B; 4237 if (colmap) *colmap = a->garray; 4238 PetscFunctionReturn(0); 4239 } 4240 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4243 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4244 { 4245 PetscErrorCode ierr; 4246 PetscInt i; 4247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4248 4249 PetscFunctionBegin; 4250 if (coloring->ctype == IS_COLORING_GLOBAL) { 4251 ISColoringValue *allcolors,*colors; 4252 ISColoring ocoloring; 4253 4254 /* set coloring for diagonal portion */ 4255 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4256 4257 /* set coloring for off-diagonal portion */ 4258 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4259 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4260 for (i=0; i<a->B->cmap->n; i++) { 4261 colors[i] = allcolors[a->garray[i]]; 4262 } 4263 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4264 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4265 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4266 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4267 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4268 ISColoringValue *colors; 4269 PetscInt *larray; 4270 ISColoring ocoloring; 4271 4272 /* set coloring for diagonal portion */ 4273 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4274 for (i=0; i<a->A->cmap->n; i++) { 4275 larray[i] = i + A->cmap->rstart; 4276 } 4277 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4278 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->A->cmap->n; i++) { 4280 colors[i] = coloring->colors[larray[i]]; 4281 } 4282 ierr = PetscFree(larray);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 4287 /* set coloring for off-diagonal portion */ 4288 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4289 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4290 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4291 for (i=0; i<a->B->cmap->n; i++) { 4292 colors[i] = coloring->colors[larray[i]]; 4293 } 4294 ierr = PetscFree(larray);CHKERRQ(ierr); 4295 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4296 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4297 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4298 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4299 PetscFunctionReturn(0); 4300 } 4301 4302 #undef __FUNCT__ 4303 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4304 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4305 { 4306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4307 PetscErrorCode ierr; 4308 4309 PetscFunctionBegin; 4310 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4311 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4317 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4318 { 4319 PetscErrorCode ierr; 4320 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4321 PetscInt *indx; 4322 4323 PetscFunctionBegin; 4324 /* This routine will ONLY return MPIAIJ type matrix */ 4325 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4326 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4327 if (n == PETSC_DECIDE) { 4328 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4329 } 4330 /* Check sum(n) = N */ 4331 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4332 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4333 4334 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4335 rstart -= m; 4336 4337 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4338 for (i=0; i<m; i++) { 4339 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4340 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4341 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4342 } 4343 4344 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4345 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4346 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4347 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4348 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4349 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4350 PetscFunctionReturn(0); 4351 } 4352 4353 #undef __FUNCT__ 4354 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4355 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4356 { 4357 PetscErrorCode ierr; 4358 PetscInt m,N,i,rstart,nnz,Ii; 4359 PetscInt *indx; 4360 PetscScalar *values; 4361 4362 PetscFunctionBegin; 4363 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4364 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4365 for (i=0; i<m; i++) { 4366 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4367 Ii = i + rstart; 4368 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4369 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4370 } 4371 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 PetscFunctionReturn(0); 4374 } 4375 4376 #undef __FUNCT__ 4377 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4378 /*@ 4379 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4380 matrices from each processor 4381 4382 Collective on MPI_Comm 4383 4384 Input Parameters: 4385 + comm - the communicators the parallel matrix will live on 4386 . inmat - the input sequential matrices 4387 . n - number of local columns (or PETSC_DECIDE) 4388 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4389 4390 Output Parameter: 4391 . outmat - the parallel matrix generated 4392 4393 Level: advanced 4394 4395 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4396 4397 @*/ 4398 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4399 { 4400 PetscErrorCode ierr; 4401 PetscMPIInt size; 4402 4403 PetscFunctionBegin; 4404 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4405 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4406 if (size == 1) { 4407 if (scall == MAT_INITIAL_MATRIX) { 4408 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4409 } else { 4410 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4411 } 4412 } else { 4413 if (scall == MAT_INITIAL_MATRIX) { 4414 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4415 } 4416 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4417 } 4418 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatFileSplit" 4424 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4425 { 4426 PetscErrorCode ierr; 4427 PetscMPIInt rank; 4428 PetscInt m,N,i,rstart,nnz; 4429 size_t len; 4430 const PetscInt *indx; 4431 PetscViewer out; 4432 char *name; 4433 Mat B; 4434 const PetscScalar *values; 4435 4436 PetscFunctionBegin; 4437 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4438 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4439 /* Should this be the type of the diagonal block of A? */ 4440 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4441 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4442 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4443 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4444 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4445 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4446 for (i=0; i<m; i++) { 4447 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4448 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4449 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4450 } 4451 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 4454 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4455 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4456 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4457 sprintf(name,"%s.%d",outfile,rank); 4458 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4459 ierr = PetscFree(name);CHKERRQ(ierr); 4460 ierr = MatView(B,out);CHKERRQ(ierr); 4461 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4462 ierr = MatDestroy(&B);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4469 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4470 { 4471 PetscErrorCode ierr; 4472 Mat_Merge_SeqsToMPI *merge; 4473 PetscContainer container; 4474 4475 PetscFunctionBegin; 4476 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4477 if (container) { 4478 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4491 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4492 ierr = PetscFree(merge);CHKERRQ(ierr); 4493 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4494 } 4495 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 #include <../src/mat/utils/freespace.h> 4500 #include <petscbt.h> 4501 4502 #undef __FUNCT__ 4503 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4504 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4505 { 4506 PetscErrorCode ierr; 4507 MPI_Comm comm; 4508 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4509 PetscMPIInt size,rank,taga,*len_s; 4510 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4511 PetscInt proc,m; 4512 PetscInt **buf_ri,**buf_rj; 4513 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4514 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4515 MPI_Request *s_waits,*r_waits; 4516 MPI_Status *status; 4517 MatScalar *aa=a->a; 4518 MatScalar **abuf_r,*ba_i; 4519 Mat_Merge_SeqsToMPI *merge; 4520 PetscContainer container; 4521 4522 PetscFunctionBegin; 4523 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4524 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4525 4526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4528 4529 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4530 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4531 4532 bi = merge->bi; 4533 bj = merge->bj; 4534 buf_ri = merge->buf_ri; 4535 buf_rj = merge->buf_rj; 4536 4537 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4538 owners = merge->rowmap->range; 4539 len_s = merge->len_s; 4540 4541 /* send and recv matrix values */ 4542 /*-----------------------------*/ 4543 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4544 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4545 4546 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4547 for (proc=0,k=0; proc<size; proc++) { 4548 if (!len_s[proc]) continue; 4549 i = owners[proc]; 4550 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4551 k++; 4552 } 4553 4554 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4555 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4556 ierr = PetscFree(status);CHKERRQ(ierr); 4557 4558 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4559 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4560 4561 /* insert mat values of mpimat */ 4562 /*----------------------------*/ 4563 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4564 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4565 4566 for (k=0; k<merge->nrecv; k++) { 4567 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4568 nrows = *(buf_ri_k[k]); 4569 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4570 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4571 } 4572 4573 /* set values of ba */ 4574 m = merge->rowmap->n; 4575 for (i=0; i<m; i++) { 4576 arow = owners[rank] + i; 4577 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4578 bnzi = bi[i+1] - bi[i]; 4579 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4580 4581 /* add local non-zero vals of this proc's seqmat into ba */ 4582 anzi = ai[arow+1] - ai[arow]; 4583 aj = a->j + ai[arow]; 4584 aa = a->a + ai[arow]; 4585 nextaj = 0; 4586 for (j=0; nextaj<anzi; j++) { 4587 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4588 ba_i[j] += aa[nextaj++]; 4589 } 4590 } 4591 4592 /* add received vals into ba */ 4593 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4594 /* i-th row */ 4595 if (i == *nextrow[k]) { 4596 anzi = *(nextai[k]+1) - *nextai[k]; 4597 aj = buf_rj[k] + *(nextai[k]); 4598 aa = abuf_r[k] + *(nextai[k]); 4599 nextaj = 0; 4600 for (j=0; nextaj<anzi; j++) { 4601 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4602 ba_i[j] += aa[nextaj++]; 4603 } 4604 } 4605 nextrow[k]++; nextai[k]++; 4606 } 4607 } 4608 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4609 } 4610 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 4613 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4615 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4616 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4617 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4618 PetscFunctionReturn(0); 4619 } 4620 4621 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4622 4623 #undef __FUNCT__ 4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4625 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4626 { 4627 PetscErrorCode ierr; 4628 Mat B_mpi; 4629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4630 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4631 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4632 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4633 PetscInt len,proc,*dnz,*onz,bs,cbs; 4634 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4635 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4636 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4637 MPI_Status *status; 4638 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4639 PetscBT lnkbt; 4640 Mat_Merge_SeqsToMPI *merge; 4641 PetscContainer container; 4642 4643 PetscFunctionBegin; 4644 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4645 4646 /* make sure it is a PETSc comm */ 4647 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4648 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4649 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4650 4651 ierr = PetscNew(&merge);CHKERRQ(ierr); 4652 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4653 4654 /* determine row ownership */ 4655 /*---------------------------------------------------------*/ 4656 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4657 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4661 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4662 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4663 4664 m = merge->rowmap->n; 4665 owners = merge->rowmap->range; 4666 4667 /* determine the number of messages to send, their lengths */ 4668 /*---------------------------------------------------------*/ 4669 len_s = merge->len_s; 4670 4671 len = 0; /* length of buf_si[] */ 4672 merge->nsend = 0; 4673 for (proc=0; proc<size; proc++) { 4674 len_si[proc] = 0; 4675 if (proc == rank) { 4676 len_s[proc] = 0; 4677 } else { 4678 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4679 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4680 } 4681 if (len_s[proc]) { 4682 merge->nsend++; 4683 nrows = 0; 4684 for (i=owners[proc]; i<owners[proc+1]; i++) { 4685 if (ai[i+1] > ai[i]) nrows++; 4686 } 4687 len_si[proc] = 2*(nrows+1); 4688 len += len_si[proc]; 4689 } 4690 } 4691 4692 /* determine the number and length of messages to receive for ij-structure */ 4693 /*-------------------------------------------------------------------------*/ 4694 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4695 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4696 4697 /* post the Irecv of j-structure */ 4698 /*-------------------------------*/ 4699 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4700 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4701 4702 /* post the Isend of j-structure */ 4703 /*--------------------------------*/ 4704 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4705 4706 for (proc=0, k=0; proc<size; proc++) { 4707 if (!len_s[proc]) continue; 4708 i = owners[proc]; 4709 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4710 k++; 4711 } 4712 4713 /* receives and sends of j-structure are complete */ 4714 /*------------------------------------------------*/ 4715 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4716 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4717 4718 /* send and recv i-structure */ 4719 /*---------------------------*/ 4720 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4721 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4722 4723 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4724 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4725 for (proc=0,k=0; proc<size; proc++) { 4726 if (!len_s[proc]) continue; 4727 /* form outgoing message for i-structure: 4728 buf_si[0]: nrows to be sent 4729 [1:nrows]: row index (global) 4730 [nrows+1:2*nrows+1]: i-structure index 4731 */ 4732 /*-------------------------------------------*/ 4733 nrows = len_si[proc]/2 - 1; 4734 buf_si_i = buf_si + nrows+1; 4735 buf_si[0] = nrows; 4736 buf_si_i[0] = 0; 4737 nrows = 0; 4738 for (i=owners[proc]; i<owners[proc+1]; i++) { 4739 anzi = ai[i+1] - ai[i]; 4740 if (anzi) { 4741 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4742 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4743 nrows++; 4744 } 4745 } 4746 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4747 k++; 4748 buf_si += len_si[proc]; 4749 } 4750 4751 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4752 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4753 4754 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4755 for (i=0; i<merge->nrecv; i++) { 4756 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4757 } 4758 4759 ierr = PetscFree(len_si);CHKERRQ(ierr); 4760 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4761 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4762 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4763 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4764 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4765 ierr = PetscFree(status);CHKERRQ(ierr); 4766 4767 /* compute a local seq matrix in each processor */ 4768 /*----------------------------------------------*/ 4769 /* allocate bi array and free space for accumulating nonzero column info */ 4770 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4771 bi[0] = 0; 4772 4773 /* create and initialize a linked list */ 4774 nlnk = N+1; 4775 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4776 4777 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4778 len = ai[owners[rank+1]] - ai[owners[rank]]; 4779 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4780 4781 current_space = free_space; 4782 4783 /* determine symbolic info for each local row */ 4784 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4785 4786 for (k=0; k<merge->nrecv; k++) { 4787 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4788 nrows = *buf_ri_k[k]; 4789 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4790 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4791 } 4792 4793 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4794 len = 0; 4795 for (i=0; i<m; i++) { 4796 bnzi = 0; 4797 /* add local non-zero cols of this proc's seqmat into lnk */ 4798 arow = owners[rank] + i; 4799 anzi = ai[arow+1] - ai[arow]; 4800 aj = a->j + ai[arow]; 4801 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4802 bnzi += nlnk; 4803 /* add received col data into lnk */ 4804 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4805 if (i == *nextrow[k]) { /* i-th row */ 4806 anzi = *(nextai[k]+1) - *nextai[k]; 4807 aj = buf_rj[k] + *nextai[k]; 4808 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4809 bnzi += nlnk; 4810 nextrow[k]++; nextai[k]++; 4811 } 4812 } 4813 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4814 4815 /* if free space is not available, make more free space */ 4816 if (current_space->local_remaining<bnzi) { 4817 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4818 nspacedouble++; 4819 } 4820 /* copy data into free space, then initialize lnk */ 4821 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4822 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4823 4824 current_space->array += bnzi; 4825 current_space->local_used += bnzi; 4826 current_space->local_remaining -= bnzi; 4827 4828 bi[i+1] = bi[i] + bnzi; 4829 } 4830 4831 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4832 4833 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4834 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4835 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4836 4837 /* create symbolic parallel matrix B_mpi */ 4838 /*---------------------------------------*/ 4839 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4840 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4841 if (n==PETSC_DECIDE) { 4842 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4843 } else { 4844 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4845 } 4846 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4847 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4848 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4849 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4850 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4851 4852 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4853 B_mpi->assembled = PETSC_FALSE; 4854 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4855 merge->bi = bi; 4856 merge->bj = bj; 4857 merge->buf_ri = buf_ri; 4858 merge->buf_rj = buf_rj; 4859 merge->coi = NULL; 4860 merge->coj = NULL; 4861 merge->owners_co = NULL; 4862 4863 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4864 4865 /* attach the supporting struct to B_mpi for reuse */ 4866 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4867 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4868 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4869 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4870 *mpimat = B_mpi; 4871 4872 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4878 /*@C 4879 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4880 matrices from each processor 4881 4882 Collective on MPI_Comm 4883 4884 Input Parameters: 4885 + comm - the communicators the parallel matrix will live on 4886 . seqmat - the input sequential matrices 4887 . m - number of local rows (or PETSC_DECIDE) 4888 . n - number of local columns (or PETSC_DECIDE) 4889 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4890 4891 Output Parameter: 4892 . mpimat - the parallel matrix generated 4893 4894 Level: advanced 4895 4896 Notes: 4897 The dimensions of the sequential matrix in each processor MUST be the same. 4898 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4899 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4900 @*/ 4901 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4902 { 4903 PetscErrorCode ierr; 4904 PetscMPIInt size; 4905 4906 PetscFunctionBegin; 4907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4908 if (size == 1) { 4909 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 if (scall == MAT_INITIAL_MATRIX) { 4911 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4912 } else { 4913 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4914 } 4915 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 if (scall == MAT_INITIAL_MATRIX) { 4920 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4921 } 4922 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4923 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 4927 #undef __FUNCT__ 4928 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4929 /*@ 4930 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4931 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4932 with MatGetSize() 4933 4934 Not Collective 4935 4936 Input Parameters: 4937 + A - the matrix 4938 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4939 4940 Output Parameter: 4941 . A_loc - the local sequential matrix generated 4942 4943 Level: developer 4944 4945 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4946 4947 @*/ 4948 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4949 { 4950 PetscErrorCode ierr; 4951 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4952 Mat_SeqAIJ *mat,*a,*b; 4953 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4954 MatScalar *aa,*ba,*cam; 4955 PetscScalar *ca; 4956 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4957 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4958 PetscBool match; 4959 MPI_Comm comm; 4960 PetscMPIInt size; 4961 4962 PetscFunctionBegin; 4963 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4964 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4965 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4967 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4968 4969 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4970 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4971 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4972 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4973 aa = a->a; ba = b->a; 4974 if (scall == MAT_INITIAL_MATRIX) { 4975 if (size == 1) { 4976 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4977 PetscFunctionReturn(0); 4978 } 4979 4980 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4981 ci[0] = 0; 4982 for (i=0; i<am; i++) { 4983 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4984 } 4985 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4986 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4987 k = 0; 4988 for (i=0; i<am; i++) { 4989 ncols_o = bi[i+1] - bi[i]; 4990 ncols_d = ai[i+1] - ai[i]; 4991 /* off-diagonal portion of A */ 4992 for (jo=0; jo<ncols_o; jo++) { 4993 col = cmap[*bj]; 4994 if (col >= cstart) break; 4995 cj[k] = col; bj++; 4996 ca[k++] = *ba++; 4997 } 4998 /* diagonal portion of A */ 4999 for (j=0; j<ncols_d; j++) { 5000 cj[k] = cstart + *aj++; 5001 ca[k++] = *aa++; 5002 } 5003 /* off-diagonal portion of A */ 5004 for (j=jo; j<ncols_o; j++) { 5005 cj[k] = cmap[*bj++]; 5006 ca[k++] = *ba++; 5007 } 5008 } 5009 /* put together the new matrix */ 5010 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5011 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5012 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5013 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5014 mat->free_a = PETSC_TRUE; 5015 mat->free_ij = PETSC_TRUE; 5016 mat->nonew = 0; 5017 } else if (scall == MAT_REUSE_MATRIX) { 5018 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5019 ci = mat->i; cj = mat->j; cam = mat->a; 5020 for (i=0; i<am; i++) { 5021 /* off-diagonal portion of A */ 5022 ncols_o = bi[i+1] - bi[i]; 5023 for (jo=0; jo<ncols_o; jo++) { 5024 col = cmap[*bj]; 5025 if (col >= cstart) break; 5026 *cam++ = *ba++; bj++; 5027 } 5028 /* diagonal portion of A */ 5029 ncols_d = ai[i+1] - ai[i]; 5030 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5031 /* off-diagonal portion of A */ 5032 for (j=jo; j<ncols_o; j++) { 5033 *cam++ = *ba++; bj++; 5034 } 5035 } 5036 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5037 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5038 PetscFunctionReturn(0); 5039 } 5040 5041 #undef __FUNCT__ 5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5043 /*@C 5044 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5045 5046 Not Collective 5047 5048 Input Parameters: 5049 + A - the matrix 5050 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5051 - row, col - index sets of rows and columns to extract (or NULL) 5052 5053 Output Parameter: 5054 . A_loc - the local sequential matrix generated 5055 5056 Level: developer 5057 5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5059 5060 @*/ 5061 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5062 { 5063 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5064 PetscErrorCode ierr; 5065 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5066 IS isrowa,iscola; 5067 Mat *aloc; 5068 PetscBool match; 5069 5070 PetscFunctionBegin; 5071 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5072 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5073 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5074 if (!row) { 5075 start = A->rmap->rstart; end = A->rmap->rend; 5076 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5077 } else { 5078 isrowa = *row; 5079 } 5080 if (!col) { 5081 start = A->cmap->rstart; 5082 cmap = a->garray; 5083 nzA = a->A->cmap->n; 5084 nzB = a->B->cmap->n; 5085 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5086 ncols = 0; 5087 for (i=0; i<nzB; i++) { 5088 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5089 else break; 5090 } 5091 imark = i; 5092 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5093 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5094 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5095 } else { 5096 iscola = *col; 5097 } 5098 if (scall != MAT_INITIAL_MATRIX) { 5099 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5100 aloc[0] = *A_loc; 5101 } 5102 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5103 *A_loc = aloc[0]; 5104 ierr = PetscFree(aloc);CHKERRQ(ierr); 5105 if (!row) { 5106 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5107 } 5108 if (!col) { 5109 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5110 } 5111 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatGetBrowsOfAcols" 5117 /*@C 5118 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5119 5120 Collective on Mat 5121 5122 Input Parameters: 5123 + A,B - the matrices in mpiaij format 5124 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5126 5127 Output Parameter: 5128 + rowb, colb - index sets of rows and columns of B to extract 5129 - B_seq - the sequential matrix generated 5130 5131 Level: developer 5132 5133 @*/ 5134 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5135 { 5136 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5137 PetscErrorCode ierr; 5138 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5139 IS isrowb,iscolb; 5140 Mat *bseq=NULL; 5141 5142 PetscFunctionBegin; 5143 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5144 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5145 } 5146 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5147 5148 if (scall == MAT_INITIAL_MATRIX) { 5149 start = A->cmap->rstart; 5150 cmap = a->garray; 5151 nzA = a->A->cmap->n; 5152 nzB = a->B->cmap->n; 5153 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5154 ncols = 0; 5155 for (i=0; i<nzB; i++) { /* row < local row index */ 5156 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5157 else break; 5158 } 5159 imark = i; 5160 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5161 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5162 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5163 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5164 } else { 5165 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5166 isrowb = *rowb; iscolb = *colb; 5167 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5168 bseq[0] = *B_seq; 5169 } 5170 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5171 *B_seq = bseq[0]; 5172 ierr = PetscFree(bseq);CHKERRQ(ierr); 5173 if (!rowb) { 5174 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5175 } else { 5176 *rowb = isrowb; 5177 } 5178 if (!colb) { 5179 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5180 } else { 5181 *colb = iscolb; 5182 } 5183 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 #undef __FUNCT__ 5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5189 /* 5190 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5191 of the OFF-DIAGONAL portion of local A 5192 5193 Collective on Mat 5194 5195 Input Parameters: 5196 + A,B - the matrices in mpiaij format 5197 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5198 5199 Output Parameter: 5200 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5201 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5202 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5203 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5204 5205 Level: developer 5206 5207 */ 5208 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5209 { 5210 VecScatter_MPI_General *gen_to,*gen_from; 5211 PetscErrorCode ierr; 5212 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5213 Mat_SeqAIJ *b_oth; 5214 VecScatter ctx =a->Mvctx; 5215 MPI_Comm comm; 5216 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5217 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5218 PetscScalar *rvalues,*svalues; 5219 MatScalar *b_otha,*bufa,*bufA; 5220 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5221 MPI_Request *rwaits = NULL,*swaits = NULL; 5222 MPI_Status *sstatus,rstatus; 5223 PetscMPIInt jj,size; 5224 PetscInt *cols,sbs,rbs; 5225 PetscScalar *vals; 5226 5227 PetscFunctionBegin; 5228 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5229 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5230 if (size == 1) PetscFunctionReturn(0); 5231 5232 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5233 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5234 } 5235 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5236 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5237 5238 gen_to = (VecScatter_MPI_General*)ctx->todata; 5239 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5240 rvalues = gen_from->values; /* holds the length of receiving row */ 5241 svalues = gen_to->values; /* holds the length of sending row */ 5242 nrecvs = gen_from->n; 5243 nsends = gen_to->n; 5244 5245 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5246 srow = gen_to->indices; /* local row index to be sent */ 5247 sstarts = gen_to->starts; 5248 sprocs = gen_to->procs; 5249 sstatus = gen_to->sstatus; 5250 sbs = gen_to->bs; 5251 rstarts = gen_from->starts; 5252 rprocs = gen_from->procs; 5253 rbs = gen_from->bs; 5254 5255 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5256 if (scall == MAT_INITIAL_MATRIX) { 5257 /* i-array */ 5258 /*---------*/ 5259 /* post receives */ 5260 for (i=0; i<nrecvs; i++) { 5261 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5262 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5263 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5264 } 5265 5266 /* pack the outgoing message */ 5267 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5268 5269 sstartsj[0] = 0; 5270 rstartsj[0] = 0; 5271 len = 0; /* total length of j or a array to be sent */ 5272 k = 0; 5273 for (i=0; i<nsends; i++) { 5274 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 for (j=0; j<nrows; j++) { 5277 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5278 for (l=0; l<sbs; l++) { 5279 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5280 5281 rowlen[j*sbs+l] = ncols; 5282 5283 len += ncols; 5284 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5285 } 5286 k++; 5287 } 5288 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5289 5290 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5291 } 5292 /* recvs and sends of i-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 5299 /* allocate buffers for sending j and a arrays */ 5300 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5301 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5302 5303 /* create i-array of B_oth */ 5304 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5305 5306 b_othi[0] = 0; 5307 len = 0; /* total length of j or a array to be received */ 5308 k = 0; 5309 for (i=0; i<nrecvs; i++) { 5310 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5311 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5312 for (j=0; j<nrows; j++) { 5313 b_othi[k+1] = b_othi[k] + rowlen[j]; 5314 len += rowlen[j]; k++; 5315 } 5316 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5317 } 5318 5319 /* allocate space for j and a arrrays of B_oth */ 5320 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5321 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5322 5323 /* j-array */ 5324 /*---------*/ 5325 /* post receives of j-array */ 5326 for (i=0; i<nrecvs; i++) { 5327 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5328 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message j-array */ 5332 k = 0; 5333 for (i=0; i<nsends; i++) { 5334 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5335 bufJ = bufj+sstartsj[i]; 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5338 for (ll=0; ll<sbs; ll++) { 5339 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5340 for (l=0; l<ncols; l++) { 5341 *bufJ++ = cols[l]; 5342 } 5343 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5344 } 5345 } 5346 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* recvs and sends of j-array are completed */ 5350 i = nrecvs; 5351 while (i--) { 5352 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5353 } 5354 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5355 } else if (scall == MAT_REUSE_MATRIX) { 5356 sstartsj = *startsj_s; 5357 rstartsj = *startsj_r; 5358 bufa = *bufa_ptr; 5359 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5360 b_otha = b_oth->a; 5361 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5362 5363 /* a-array */ 5364 /*---------*/ 5365 /* post receives of a-array */ 5366 for (i=0; i<nrecvs; i++) { 5367 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5368 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5369 } 5370 5371 /* pack the outgoing message a-array */ 5372 k = 0; 5373 for (i=0; i<nsends; i++) { 5374 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5375 bufA = bufa+sstartsj[i]; 5376 for (j=0; j<nrows; j++) { 5377 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5378 for (ll=0; ll<sbs; ll++) { 5379 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5380 for (l=0; l<ncols; l++) { 5381 *bufA++ = vals[l]; 5382 } 5383 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5384 } 5385 } 5386 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5387 } 5388 /* recvs and sends of a-array are completed */ 5389 i = nrecvs; 5390 while (i--) { 5391 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5392 } 5393 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5394 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5395 5396 if (scall == MAT_INITIAL_MATRIX) { 5397 /* put together the new matrix */ 5398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5399 5400 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5401 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5402 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5403 b_oth->free_a = PETSC_TRUE; 5404 b_oth->free_ij = PETSC_TRUE; 5405 b_oth->nonew = 0; 5406 5407 ierr = PetscFree(bufj);CHKERRQ(ierr); 5408 if (!startsj_s || !bufa_ptr) { 5409 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5410 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5411 } else { 5412 *startsj_s = sstartsj; 5413 *startsj_r = rstartsj; 5414 *bufa_ptr = bufa; 5415 } 5416 } 5417 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5418 PetscFunctionReturn(0); 5419 } 5420 5421 #undef __FUNCT__ 5422 #define __FUNCT__ "MatGetCommunicationStructs" 5423 /*@C 5424 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5425 5426 Not Collective 5427 5428 Input Parameters: 5429 . A - The matrix in mpiaij format 5430 5431 Output Parameter: 5432 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5433 . colmap - A map from global column index to local index into lvec 5434 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5435 5436 Level: developer 5437 5438 @*/ 5439 #if defined(PETSC_USE_CTABLE) 5440 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5441 #else 5442 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5443 #endif 5444 { 5445 Mat_MPIAIJ *a; 5446 5447 PetscFunctionBegin; 5448 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5449 PetscValidPointer(lvec, 2); 5450 PetscValidPointer(colmap, 3); 5451 PetscValidPointer(multScatter, 4); 5452 a = (Mat_MPIAIJ*) A->data; 5453 if (lvec) *lvec = a->lvec; 5454 if (colmap) *colmap = a->colmap; 5455 if (multScatter) *multScatter = a->Mvctx; 5456 PetscFunctionReturn(0); 5457 } 5458 5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5462 5463 #undef __FUNCT__ 5464 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5465 /* 5466 Computes (B'*A')' since computing B*A directly is untenable 5467 5468 n p p 5469 ( ) ( ) ( ) 5470 m ( A ) * n ( B ) = m ( C ) 5471 ( ) ( ) ( ) 5472 5473 */ 5474 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5475 { 5476 PetscErrorCode ierr; 5477 Mat At,Bt,Ct; 5478 5479 PetscFunctionBegin; 5480 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5481 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5482 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5483 ierr = MatDestroy(&At);CHKERRQ(ierr); 5484 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5485 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5486 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5487 PetscFunctionReturn(0); 5488 } 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5492 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5493 { 5494 PetscErrorCode ierr; 5495 PetscInt m=A->rmap->n,n=B->cmap->n; 5496 Mat Cmat; 5497 5498 PetscFunctionBegin; 5499 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5500 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5501 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5502 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5503 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5504 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5505 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5507 5508 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5509 5510 *C = Cmat; 5511 PetscFunctionReturn(0); 5512 } 5513 5514 /* ----------------------------------------------------------------*/ 5515 #undef __FUNCT__ 5516 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5517 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5518 { 5519 PetscErrorCode ierr; 5520 5521 PetscFunctionBegin; 5522 if (scall == MAT_INITIAL_MATRIX) { 5523 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5524 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5525 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5526 } 5527 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5528 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5529 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5530 PetscFunctionReturn(0); 5531 } 5532 5533 #if defined(PETSC_HAVE_MUMPS) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5535 #endif 5536 #if defined(PETSC_HAVE_PASTIX) 5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5538 #endif 5539 #if defined(PETSC_HAVE_SUPERLU_DIST) 5540 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5541 #endif 5542 #if defined(PETSC_HAVE_CLIQUE) 5543 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5544 #endif 5545 5546 /*MC 5547 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5548 5549 Options Database Keys: 5550 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5551 5552 Level: beginner 5553 5554 .seealso: MatCreateAIJ() 5555 M*/ 5556 5557 #undef __FUNCT__ 5558 #define __FUNCT__ "MatCreate_MPIAIJ" 5559 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5560 { 5561 Mat_MPIAIJ *b; 5562 PetscErrorCode ierr; 5563 PetscMPIInt size; 5564 5565 PetscFunctionBegin; 5566 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5567 5568 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5569 B->data = (void*)b; 5570 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5571 B->assembled = PETSC_FALSE; 5572 B->insertmode = NOT_SET_VALUES; 5573 b->size = size; 5574 5575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5576 5577 /* build cache for off array entries formed */ 5578 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5579 5580 b->donotstash = PETSC_FALSE; 5581 b->colmap = 0; 5582 b->garray = 0; 5583 b->roworiented = PETSC_TRUE; 5584 5585 /* stuff used for matrix vector multiply */ 5586 b->lvec = NULL; 5587 b->Mvctx = NULL; 5588 5589 /* stuff for MatGetRow() */ 5590 b->rowindices = 0; 5591 b->rowvalues = 0; 5592 b->getrowactive = PETSC_FALSE; 5593 5594 /* flexible pointer used in CUSP/CUSPARSE classes */ 5595 b->spptr = NULL; 5596 5597 #if defined(PETSC_HAVE_MUMPS) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5599 #endif 5600 #if defined(PETSC_HAVE_PASTIX) 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5602 #endif 5603 #if defined(PETSC_HAVE_SUPERLU_DIST) 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5605 #endif 5606 #if defined(PETSC_HAVE_CLIQUE) 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5608 #endif 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5623 PetscFunctionReturn(0); 5624 } 5625 5626 #undef __FUNCT__ 5627 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5628 /*@C 5629 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5630 and "off-diagonal" part of the matrix in CSR format. 5631 5632 Collective on MPI_Comm 5633 5634 Input Parameters: 5635 + comm - MPI communicator 5636 . m - number of local rows (Cannot be PETSC_DECIDE) 5637 . n - This value should be the same as the local size used in creating the 5638 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5639 calculated if N is given) For square matrices n is almost always m. 5640 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5641 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5642 . i - row indices for "diagonal" portion of matrix 5643 . j - column indices 5644 . a - matrix values 5645 . oi - row indices for "off-diagonal" portion of matrix 5646 . oj - column indices 5647 - oa - matrix values 5648 5649 Output Parameter: 5650 . mat - the matrix 5651 5652 Level: advanced 5653 5654 Notes: 5655 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5656 must free the arrays once the matrix has been destroyed and not before. 5657 5658 The i and j indices are 0 based 5659 5660 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5661 5662 This sets local rows and cannot be used to set off-processor values. 5663 5664 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5665 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5666 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5667 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5668 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5669 communication if it is known that only local entries will be set. 5670 5671 .keywords: matrix, aij, compressed row, sparse, parallel 5672 5673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5674 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5675 C@*/ 5676 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5677 { 5678 PetscErrorCode ierr; 5679 Mat_MPIAIJ *maij; 5680 5681 PetscFunctionBegin; 5682 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5683 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5684 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5685 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5686 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5687 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5688 maij = (Mat_MPIAIJ*) (*mat)->data; 5689 5690 (*mat)->preallocated = PETSC_TRUE; 5691 5692 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5693 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5694 5695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5696 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5697 5698 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 5703 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5706 PetscFunctionReturn(0); 5707 } 5708 5709 /* 5710 Special version for direct calls from Fortran 5711 */ 5712 #include <petsc-private/fortranimpl.h> 5713 5714 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5715 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5716 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5717 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5718 #endif 5719 5720 /* Change these macros so can be used in void function */ 5721 #undef CHKERRQ 5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5723 #undef SETERRQ2 5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5725 #undef SETERRQ3 5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5727 #undef SETERRQ 5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5729 5730 #undef __FUNCT__ 5731 #define __FUNCT__ "matsetvaluesmpiaij_" 5732 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5733 { 5734 Mat mat = *mmat; 5735 PetscInt m = *mm, n = *mn; 5736 InsertMode addv = *maddv; 5737 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5738 PetscScalar value; 5739 PetscErrorCode ierr; 5740 5741 MatCheckPreallocated(mat,1); 5742 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5743 5744 #if defined(PETSC_USE_DEBUG) 5745 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5746 #endif 5747 { 5748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5750 PetscBool roworiented = aij->roworiented; 5751 5752 /* Some Variables required in the macro */ 5753 Mat A = aij->A; 5754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5755 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5756 MatScalar *aa = a->a; 5757 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5758 Mat B = aij->B; 5759 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5760 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5761 MatScalar *ba = b->a; 5762 5763 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5764 PetscInt nonew = a->nonew; 5765 MatScalar *ap1,*ap2; 5766 5767 PetscFunctionBegin; 5768 for (i=0; i<m; i++) { 5769 if (im[i] < 0) continue; 5770 #if defined(PETSC_USE_DEBUG) 5771 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5772 #endif 5773 if (im[i] >= rstart && im[i] < rend) { 5774 row = im[i] - rstart; 5775 lastcol1 = -1; 5776 rp1 = aj + ai[row]; 5777 ap1 = aa + ai[row]; 5778 rmax1 = aimax[row]; 5779 nrow1 = ailen[row]; 5780 low1 = 0; 5781 high1 = nrow1; 5782 lastcol2 = -1; 5783 rp2 = bj + bi[row]; 5784 ap2 = ba + bi[row]; 5785 rmax2 = bimax[row]; 5786 nrow2 = bilen[row]; 5787 low2 = 0; 5788 high2 = nrow2; 5789 5790 for (j=0; j<n; j++) { 5791 if (roworiented) value = v[i*n+j]; 5792 else value = v[i+j*m]; 5793 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5794 if (in[j] >= cstart && in[j] < cend) { 5795 col = in[j] - cstart; 5796 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5797 } else if (in[j] < 0) continue; 5798 #if defined(PETSC_USE_DEBUG) 5799 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5800 #endif 5801 else { 5802 if (mat->was_assembled) { 5803 if (!aij->colmap) { 5804 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5805 } 5806 #if defined(PETSC_USE_CTABLE) 5807 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5808 col--; 5809 #else 5810 col = aij->colmap[in[j]] - 1; 5811 #endif 5812 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5813 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5814 col = in[j]; 5815 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5816 B = aij->B; 5817 b = (Mat_SeqAIJ*)B->data; 5818 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5819 rp2 = bj + bi[row]; 5820 ap2 = ba + bi[row]; 5821 rmax2 = bimax[row]; 5822 nrow2 = bilen[row]; 5823 low2 = 0; 5824 high2 = nrow2; 5825 bm = aij->B->rmap->n; 5826 ba = b->a; 5827 } 5828 } else col = in[j]; 5829 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5830 } 5831 } 5832 } else if (!aij->donotstash) { 5833 if (roworiented) { 5834 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5835 } else { 5836 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5837 } 5838 } 5839 } 5840 } 5841 PetscFunctionReturnVoid(); 5842 } 5843 5844