1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 175 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 176 { 177 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 178 IS sis,gis; 179 PetscErrorCode ierr; 180 const PetscInt *isis,*igis; 181 PetscInt n,*iis,nsis,ngis,rstart,i; 182 183 PetscFunctionBegin; 184 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 185 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 186 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 187 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 188 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 189 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 190 191 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 192 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 193 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 194 n = ngis + nsis; 195 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 196 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 197 for (i=0; i<n; i++) iis[i] += rstart; 198 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 199 200 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 202 ierr = ISDestroy(&sis);CHKERRQ(ierr); 203 ierr = ISDestroy(&gis);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatDistribute_MPIAIJ" 209 /* 210 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 211 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 212 213 Only for square matrices 214 215 Used by a preconditioner, hence PETSC_EXTERN 216 */ 217 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 218 { 219 PetscMPIInt rank,size; 220 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 221 PetscErrorCode ierr; 222 Mat mat; 223 Mat_SeqAIJ *gmata; 224 PetscMPIInt tag; 225 MPI_Status status; 226 PetscBool aij; 227 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 228 229 PetscFunctionBegin; 230 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 231 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 232 if (!rank) { 233 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 234 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 235 } 236 if (reuse == MAT_INITIAL_MATRIX) { 237 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 238 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 239 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 240 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 241 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 242 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 243 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 244 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 245 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 246 247 rowners[0] = 0; 248 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 249 rstart = rowners[rank]; 250 rend = rowners[rank+1]; 251 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 252 if (!rank) { 253 gmata = (Mat_SeqAIJ*) gmat->data; 254 /* send row lengths to all processors */ 255 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 256 for (i=1; i<size; i++) { 257 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 /* determine number diagonal and off-diagonal counts */ 260 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 261 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 262 jj = 0; 263 for (i=0; i<m; i++) { 264 for (j=0; j<dlens[i]; j++) { 265 if (gmata->j[jj] < rstart) ld[i]++; 266 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 267 jj++; 268 } 269 } 270 /* send column indices to other processes */ 271 for (i=1; i<size; i++) { 272 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 273 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 274 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 275 } 276 277 /* send numerical values to other processes */ 278 for (i=1; i<size; i++) { 279 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 280 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 281 } 282 gmataa = gmata->a; 283 gmataj = gmata->j; 284 285 } else { 286 /* receive row lengths */ 287 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 288 /* receive column indices */ 289 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 290 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 291 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 292 /* determine number diagonal and off-diagonal counts */ 293 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 294 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 295 jj = 0; 296 for (i=0; i<m; i++) { 297 for (j=0; j<dlens[i]; j++) { 298 if (gmataj[jj] < rstart) ld[i]++; 299 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 300 jj++; 301 } 302 } 303 /* receive numerical values */ 304 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 305 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 306 } 307 /* set preallocation */ 308 for (i=0; i<m; i++) { 309 dlens[i] -= olens[i]; 310 } 311 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 312 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 313 314 for (i=0; i<m; i++) { 315 dlens[i] += olens[i]; 316 } 317 cnt = 0; 318 for (i=0; i<m; i++) { 319 row = rstart + i; 320 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 321 cnt += dlens[i]; 322 } 323 if (rank) { 324 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 325 } 326 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 327 ierr = PetscFree(rowners);CHKERRQ(ierr); 328 329 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 330 331 *inmat = mat; 332 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 333 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 334 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 335 mat = *inmat; 336 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 337 if (!rank) { 338 /* send numerical values to other processes */ 339 gmata = (Mat_SeqAIJ*) gmat->data; 340 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 341 gmataa = gmata->a; 342 for (i=1; i<size; i++) { 343 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 344 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 345 } 346 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 347 } else { 348 /* receive numerical values from process 0*/ 349 nz = Ad->nz + Ao->nz; 350 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 351 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 352 } 353 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 354 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 355 ad = Ad->a; 356 ao = Ao->a; 357 if (mat->rmap->n) { 358 i = 0; 359 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 360 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 361 } 362 for (i=1; i<mat->rmap->n; i++) { 363 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 364 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 365 } 366 i--; 367 if (mat->rmap->n) { 368 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 369 } 370 if (rank) { 371 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 372 } 373 } 374 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 379 /* 380 Local utility routine that creates a mapping from the global column 381 number to the local number in the off-diagonal part of the local 382 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 383 a slightly higher hash table cost; without it it is not scalable (each processor 384 has an order N integer array but is fast to acess. 385 */ 386 #undef __FUNCT__ 387 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 388 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 389 { 390 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 391 PetscErrorCode ierr; 392 PetscInt n = aij->B->cmap->n,i; 393 394 PetscFunctionBegin; 395 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 396 #if defined(PETSC_USE_CTABLE) 397 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 398 for (i=0; i<n; i++) { 399 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 400 } 401 #else 402 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 403 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 404 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 405 #endif 406 PetscFunctionReturn(0); 407 } 408 409 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 410 { \ 411 if (col <= lastcol1) low1 = 0; \ 412 else high1 = nrow1; \ 413 lastcol1 = col;\ 414 while (high1-low1 > 5) { \ 415 t = (low1+high1)/2; \ 416 if (rp1[t] > col) high1 = t; \ 417 else low1 = t; \ 418 } \ 419 for (_i=low1; _i<high1; _i++) { \ 420 if (rp1[_i] > col) break; \ 421 if (rp1[_i] == col) { \ 422 if (addv == ADD_VALUES) ap1[_i] += value; \ 423 else ap1[_i] = value; \ 424 goto a_noinsert; \ 425 } \ 426 } \ 427 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 428 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 429 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 430 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 431 N = nrow1++ - 1; a->nz++; high1++; \ 432 /* shift up all the later entries in this row */ \ 433 for (ii=N; ii>=_i; ii--) { \ 434 rp1[ii+1] = rp1[ii]; \ 435 ap1[ii+1] = ap1[ii]; \ 436 } \ 437 rp1[_i] = col; \ 438 ap1[_i] = value; \ 439 A->nonzerostate++;\ 440 a_noinsert: ; \ 441 ailen[row] = nrow1; \ 442 } 443 444 445 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 446 { \ 447 if (col <= lastcol2) low2 = 0; \ 448 else high2 = nrow2; \ 449 lastcol2 = col; \ 450 while (high2-low2 > 5) { \ 451 t = (low2+high2)/2; \ 452 if (rp2[t] > col) high2 = t; \ 453 else low2 = t; \ 454 } \ 455 for (_i=low2; _i<high2; _i++) { \ 456 if (rp2[_i] > col) break; \ 457 if (rp2[_i] == col) { \ 458 if (addv == ADD_VALUES) ap2[_i] += value; \ 459 else ap2[_i] = value; \ 460 goto b_noinsert; \ 461 } \ 462 } \ 463 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 464 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 465 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 466 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 467 N = nrow2++ - 1; b->nz++; high2++; \ 468 /* shift up all the later entries in this row */ \ 469 for (ii=N; ii>=_i; ii--) { \ 470 rp2[ii+1] = rp2[ii]; \ 471 ap2[ii+1] = ap2[ii]; \ 472 } \ 473 rp2[_i] = col; \ 474 ap2[_i] = value; \ 475 B->nonzerostate++; \ 476 b_noinsert: ; \ 477 bilen[row] = nrow2; \ 478 } 479 480 #undef __FUNCT__ 481 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 482 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 483 { 484 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 486 PetscErrorCode ierr; 487 PetscInt l,*garray = mat->garray,diag; 488 489 PetscFunctionBegin; 490 /* code only works for square matrices A */ 491 492 /* find size of row to the left of the diagonal part */ 493 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 494 row = row - diag; 495 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 496 if (garray[b->j[b->i[row]+l]] > diag) break; 497 } 498 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 499 500 /* diagonal part */ 501 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 502 503 /* right of diagonal part */ 504 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatSetValues_MPIAIJ" 510 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 511 { 512 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 513 PetscScalar value; 514 PetscErrorCode ierr; 515 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 516 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 517 PetscBool roworiented = aij->roworiented; 518 519 /* Some Variables required in the macro */ 520 Mat A = aij->A; 521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 522 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 523 MatScalar *aa = a->a; 524 PetscBool ignorezeroentries = a->ignorezeroentries; 525 Mat B = aij->B; 526 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 527 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 528 MatScalar *ba = b->a; 529 530 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 531 PetscInt nonew; 532 MatScalar *ap1,*ap2; 533 534 PetscFunctionBegin; 535 for (i=0; i<m; i++) { 536 if (im[i] < 0) continue; 537 #if defined(PETSC_USE_DEBUG) 538 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 539 #endif 540 if (im[i] >= rstart && im[i] < rend) { 541 row = im[i] - rstart; 542 lastcol1 = -1; 543 rp1 = aj + ai[row]; 544 ap1 = aa + ai[row]; 545 rmax1 = aimax[row]; 546 nrow1 = ailen[row]; 547 low1 = 0; 548 high1 = nrow1; 549 lastcol2 = -1; 550 rp2 = bj + bi[row]; 551 ap2 = ba + bi[row]; 552 rmax2 = bimax[row]; 553 nrow2 = bilen[row]; 554 low2 = 0; 555 high2 = nrow2; 556 557 for (j=0; j<n; j++) { 558 if (roworiented) value = v[i*n+j]; 559 else value = v[i+j*m]; 560 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 561 if (in[j] >= cstart && in[j] < cend) { 562 col = in[j] - cstart; 563 nonew = a->nonew; 564 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 565 } else if (in[j] < 0) continue; 566 #if defined(PETSC_USE_DEBUG) 567 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 568 #endif 569 else { 570 if (mat->was_assembled) { 571 if (!aij->colmap) { 572 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 573 } 574 #if defined(PETSC_USE_CTABLE) 575 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 576 col--; 577 #else 578 col = aij->colmap[in[j]] - 1; 579 #endif 580 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 581 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 582 col = in[j]; 583 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 584 B = aij->B; 585 b = (Mat_SeqAIJ*)B->data; 586 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 bm = aij->B->rmap->n; 594 ba = b->a; 595 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 599 } 600 } 601 } else { 602 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 607 } else { 608 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 609 } 610 } 611 } 612 } 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_MPIAIJ" 618 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 619 { 620 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 621 PetscErrorCode ierr; 622 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 623 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 624 625 PetscFunctionBegin; 626 for (i=0; i<m; i++) { 627 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 628 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 629 if (idxm[i] >= rstart && idxm[i] < rend) { 630 row = idxm[i] - rstart; 631 for (j=0; j<n; j++) { 632 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 633 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 634 if (idxn[j] >= cstart && idxn[j] < cend) { 635 col = idxn[j] - cstart; 636 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 637 } else { 638 if (!aij->colmap) { 639 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 640 } 641 #if defined(PETSC_USE_CTABLE) 642 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 643 col--; 644 #else 645 col = aij->colmap[idxn[j]] - 1; 646 #endif 647 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 648 else { 649 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 650 } 651 } 652 } 653 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 659 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 665 PetscErrorCode ierr; 666 PetscInt nstash,reallocs; 667 InsertMode addv; 668 669 PetscFunctionBegin; 670 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 671 672 /* make sure all processors are either in INSERTMODE or ADDMODE */ 673 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 674 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 675 mat->insertmode = addv; /* in case this processor had no cache */ 676 677 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 678 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 679 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 InsertMode addv = mat->insertmode; 696 697 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 698 699 PetscFunctionBegin; 700 if (!aij->donotstash && !mat->nooffprocentries) { 701 while (1) { 702 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 703 if (!flg) break; 704 705 for (i=0; i<n; ) { 706 /* Now identify the consecutive vals belonging to the same row */ 707 for (j=i,rstart=row[j]; j<n; j++) { 708 if (row[j] != rstart) break; 709 } 710 if (j < n) ncols = j-i; 711 else ncols = n-i; 712 /* Now assemble all these values with a single function call */ 713 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 714 715 i = j; 716 } 717 } 718 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 719 } 720 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 721 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourselfs, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 731 if (mat->was_assembled && !other_disassembled) { 732 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 737 } 738 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 739 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 740 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 741 742 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 743 744 aij->rowvalues = 0; 745 746 /* used by MatAXPY() */ 747 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 748 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 749 750 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 751 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 } 758 PetscFunctionReturn(0); 759 } 760 761 #undef __FUNCT__ 762 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 770 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } 773 774 #undef __FUNCT__ 775 #define __FUNCT__ "MatZeroRows_MPIAIJ" 776 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 777 { 778 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 779 PetscInt *owners = A->rmap->range; 780 PetscInt n = A->rmap->n; 781 PetscMPIInt size = mat->size; 782 PetscSF sf; 783 PetscInt *lrows; 784 PetscSFNode *rrows; 785 PetscInt lastidx = -1, r, p = 0, len = 0; 786 PetscErrorCode ierr; 787 788 PetscFunctionBegin; 789 /* Create SF where leaves are input rows and roots are owned rows */ 790 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 791 for (r = 0; r < n; ++r) lrows[r] = -1; 792 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 793 for (r = 0; r < N; ++r) { 794 const PetscInt idx = rows[r]; 795 PetscBool found = PETSC_FALSE; 796 /* Trick for efficient searching for sorted rows */ 797 if (lastidx > idx) p = 0; 798 lastidx = idx; 799 for (; p < size; ++p) { 800 if (idx >= owners[p] && idx < owners[p+1]) { 801 rrows[r].rank = p; 802 rrows[r].index = rows[r] - owners[p]; 803 found = PETSC_TRUE; 804 break; 805 } 806 } 807 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 808 } 809 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 810 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 811 /* Collect flags for rows to be zeroed */ 812 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 813 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 814 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 815 /* Compress and put in row numbers */ 816 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 817 /* fix right hand side if needed */ 818 if (x && b) { 819 const PetscScalar *xx; 820 PetscScalar *bb; 821 822 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 823 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 824 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 825 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 826 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 827 } 828 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 830 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 831 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 832 } else if (diag != 0.0) { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 835 for (r = 0; r < len; ++r) { 836 const PetscInt row = lrows[r] + A->rmap->rstart; 837 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 838 } 839 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 } else { 842 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 843 } 844 ierr = PetscFree(lrows);CHKERRQ(ierr); 845 846 /* only change matrix nonzero state if pattern was allowed to be changed */ 847 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 848 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 849 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 856 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 857 { 858 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 859 PetscErrorCode ierr; 860 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 861 PetscInt i,j,r,m,p = 0,len = 0; 862 PetscInt *lrows,*owners = A->rmap->range; 863 PetscSFNode *rrows; 864 PetscSF sf; 865 const PetscScalar *xx; 866 PetscScalar *bb,*mask; 867 Vec xmask,lmask; 868 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 869 const PetscInt *aj, *ii,*ridx; 870 PetscScalar *aa; 871 #if defined(PETSC_DEBUG) 872 PetscBool found = PETSC_FALSE; 873 #endif 874 875 PetscFunctionBegin; 876 /* Create SF where leaves are input rows and roots are owned rows */ 877 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 878 for (r = 0; r < n; ++r) lrows[r] = -1; 879 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 880 for (r = 0; r < N; ++r) { 881 const PetscInt idx = rows[r]; 882 PetscBool found = PETSC_FALSE; 883 /* Trick for efficient searching for sorted rows */ 884 if (lastidx > idx) p = 0; 885 lastidx = idx; 886 for (; p < size; ++p) { 887 if (idx >= owners[p] && idx < owners[p+1]) { 888 rrows[r].rank = p; 889 rrows[r].index = rows[r] - owners[p]; 890 found = PETSC_TRUE; 891 break; 892 } 893 } 894 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 895 } 896 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 897 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 898 /* Collect flags for rows to be zeroed */ 899 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 900 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 901 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 902 /* Compress and put in row numbers */ 903 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 904 /* zero diagonal part of matrix */ 905 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 906 /* handle off diagonal part of matrix */ 907 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 908 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 909 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 910 for (i=0; i<len; i++) bb[lrows[i]] = 1; 911 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 912 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 915 if (x) { 916 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 919 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 920 } 921 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 922 /* remove zeroed rows of off diagonal matrix */ 923 ii = aij->i; 924 for (i=0; i<len; i++) { 925 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 926 } 927 /* loop over all elements of off process part of matrix zeroing removed columns*/ 928 if (aij->compressedrow.use) { 929 m = aij->compressedrow.nrows; 930 ii = aij->compressedrow.i; 931 ridx = aij->compressedrow.rindex; 932 for (i=0; i<m; i++) { 933 n = ii[i+1] - ii[i]; 934 aj = aij->j + ii[i]; 935 aa = aij->a + ii[i]; 936 937 for (j=0; j<n; j++) { 938 if (PetscAbsScalar(mask[*aj])) { 939 if (b) bb[*ridx] -= *aa*xx[*aj]; 940 *aa = 0.0; 941 } 942 aa++; 943 aj++; 944 } 945 ridx++; 946 } 947 } else { /* do not use compressed row format */ 948 m = l->B->rmap->n; 949 for (i=0; i<m; i++) { 950 n = ii[i+1] - ii[i]; 951 aj = aij->j + ii[i]; 952 aa = aij->a + ii[i]; 953 for (j=0; j<n; j++) { 954 if (PetscAbsScalar(mask[*aj])) { 955 if (b) bb[i] -= *aa*xx[*aj]; 956 *aa = 0.0; 957 } 958 aa++; 959 aj++; 960 } 961 } 962 } 963 if (x) { 964 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 965 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 966 } 967 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 968 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 969 ierr = PetscFree(lrows);CHKERRQ(ierr); 970 971 /* only change matrix nonzero state if pattern was allowed to be changed */ 972 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 973 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 974 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMult_MPIAIJ" 981 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscInt nt; 986 987 PetscFunctionBegin; 988 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 989 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 990 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 992 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 999 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1000 { 1001 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1011 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1019 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1026 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1027 { 1028 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1029 PetscErrorCode ierr; 1030 PetscBool merged; 1031 1032 PetscFunctionBegin; 1033 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1034 /* do nondiagonal part */ 1035 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1036 if (!merged) { 1037 /* send it on its way */ 1038 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 /* do local part */ 1040 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1041 /* receive remote parts: note this assumes the values are not actually */ 1042 /* added in yy until the next line, */ 1043 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1044 } else { 1045 /* do local part */ 1046 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1047 /* send it on its way */ 1048 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1049 /* values actually were received in the Begin() but we need to call this nop */ 1050 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1057 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1058 { 1059 MPI_Comm comm; 1060 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1061 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1062 IS Me,Notme; 1063 PetscErrorCode ierr; 1064 PetscInt M,N,first,last,*notme,i; 1065 PetscMPIInt size; 1066 1067 PetscFunctionBegin; 1068 /* Easy test: symmetric diagonal block */ 1069 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1070 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1071 if (!*f) PetscFunctionReturn(0); 1072 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1073 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1074 if (size == 1) PetscFunctionReturn(0); 1075 1076 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1077 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1078 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1079 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1080 for (i=0; i<first; i++) notme[i] = i; 1081 for (i=last; i<M; i++) notme[i-last+first] = i; 1082 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1083 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1084 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1085 Aoff = Aoffs[0]; 1086 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1087 Boff = Boffs[0]; 1088 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1089 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1090 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1091 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1092 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1093 ierr = PetscFree(notme);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1099 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1100 { 1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 1104 PetscFunctionBegin; 1105 /* do nondiagonal part */ 1106 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1107 /* send it on its way */ 1108 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 /* do local part */ 1110 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1111 /* receive remote parts */ 1112 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 /* 1117 This only works correctly for square matrices where the subblock A->A is the 1118 diagonal block 1119 */ 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1122 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1123 { 1124 PetscErrorCode ierr; 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 1127 PetscFunctionBegin; 1128 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1129 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1130 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatScale_MPIAIJ" 1136 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1143 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatDestroy_MPIAIJ" 1149 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1150 { 1151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 #if defined(PETSC_USE_LOG) 1156 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1157 #endif 1158 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1160 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1161 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1162 #if defined(PETSC_USE_CTABLE) 1163 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1164 #else 1165 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1166 #endif 1167 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1168 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1169 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1170 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1171 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1172 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1173 1174 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1188 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1191 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1192 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1193 PetscErrorCode ierr; 1194 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1195 int fd; 1196 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1197 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1198 PetscScalar *column_values; 1199 PetscInt message_count,flowcontrolcount; 1200 FILE *file; 1201 1202 PetscFunctionBegin; 1203 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1204 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1205 nz = A->nz + B->nz; 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1213 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 /* get largest number of rows any processor has */ 1215 rlen = mat->rmap->n; 1216 range = mat->rmap->range; 1217 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1218 } else { 1219 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1220 rlen = mat->rmap->n; 1221 } 1222 1223 /* load up the local row counts */ 1224 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1225 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1226 1227 /* store the row lengths to the file */ 1228 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1229 if (!rank) { 1230 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 for (i=1; i<size; i++) { 1232 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1233 rlen = range[i+1] - range[i]; 1234 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1236 } 1237 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1238 } else { 1239 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1240 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1242 } 1243 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1244 1245 /* load up the local column indices */ 1246 nzmax = nz; /* th processor needs space a largest processor needs */ 1247 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1249 cnt = 0; 1250 for (i=0; i<mat->rmap->n; i++) { 1251 for (j=B->i[i]; j<B->i[i+1]; j++) { 1252 if ((col = garray[B->j[j]]) > cstart) break; 1253 column_indices[cnt++] = col; 1254 } 1255 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1256 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1257 } 1258 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1259 1260 /* store the column indices to the file */ 1261 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1262 if (!rank) { 1263 MPI_Status status; 1264 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1265 for (i=1; i<size; i++) { 1266 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1267 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1268 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1269 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1271 } 1272 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1273 } else { 1274 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1275 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1277 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1278 } 1279 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1280 1281 /* load up the local column values */ 1282 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1283 cnt = 0; 1284 for (i=0; i<mat->rmap->n; i++) { 1285 for (j=B->i[i]; j<B->i[i+1]; j++) { 1286 if (garray[B->j[j]] > cstart) break; 1287 column_values[cnt++] = B->a[j]; 1288 } 1289 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1290 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1291 } 1292 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1293 1294 /* store the column values to the file */ 1295 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1296 if (!rank) { 1297 MPI_Status status; 1298 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1299 for (i=1; i<size; i++) { 1300 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1301 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1302 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1303 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1304 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1305 } 1306 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1307 } else { 1308 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1309 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1311 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1312 } 1313 ierr = PetscFree(column_values);CHKERRQ(ierr); 1314 1315 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1316 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #include <petscdraw.h> 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1323 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1324 { 1325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1326 PetscErrorCode ierr; 1327 PetscMPIInt rank = aij->rank,size = aij->size; 1328 PetscBool isdraw,iascii,isbinary; 1329 PetscViewer sviewer; 1330 PetscViewerFormat format; 1331 1332 PetscFunctionBegin; 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1336 if (iascii) { 1337 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1338 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1339 MatInfo info; 1340 PetscBool inodes; 1341 1342 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1343 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1344 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1346 if (!inodes) { 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1348 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1349 } else { 1350 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1351 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1352 } 1353 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1354 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1355 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1357 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1360 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1363 PetscInt inodecount,inodelimit,*inodes; 1364 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1365 if (inodes) { 1366 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1367 } else { 1368 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1369 } 1370 PetscFunctionReturn(0); 1371 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1372 PetscFunctionReturn(0); 1373 } 1374 } else if (isbinary) { 1375 if (size == 1) { 1376 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1377 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1378 } else { 1379 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1380 } 1381 PetscFunctionReturn(0); 1382 } else if (isdraw) { 1383 PetscDraw draw; 1384 PetscBool isnull; 1385 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1386 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1387 } 1388 1389 { 1390 /* assemble the entire matrix onto first processor. */ 1391 Mat A; 1392 Mat_SeqAIJ *Aloc; 1393 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1394 MatScalar *a; 1395 1396 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1399 } else { 1400 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1401 } 1402 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1403 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1405 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1407 1408 /* copy over the A part */ 1409 Aloc = (Mat_SeqAIJ*)aij->A->data; 1410 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1411 row = mat->rmap->rstart; 1412 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1417 } 1418 aj = Aloc->j; 1419 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1420 1421 /* copy over the B part */ 1422 Aloc = (Mat_SeqAIJ*)aij->B->data; 1423 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1424 row = mat->rmap->rstart; 1425 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1426 ct = cols; 1427 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1428 for (i=0; i<m; i++) { 1429 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1430 row++; 1431 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1432 } 1433 ierr = PetscFree(ct);CHKERRQ(ierr); 1434 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 /* 1437 Everyone has to call to draw the matrix since the graphics waits are 1438 synchronized across all processors that share the PetscDraw object 1439 */ 1440 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1441 if (!rank) { 1442 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1443 } 1444 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1445 ierr = MatDestroy(&A);CHKERRQ(ierr); 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatView_MPIAIJ" 1452 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1453 { 1454 PetscErrorCode ierr; 1455 PetscBool iascii,isdraw,issocket,isbinary; 1456 1457 PetscFunctionBegin; 1458 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1462 if (iascii || isdraw || isbinary || issocket) { 1463 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSOR_MPIAIJ" 1470 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1471 { 1472 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1473 PetscErrorCode ierr; 1474 Vec bb1 = 0; 1475 PetscBool hasop; 1476 1477 PetscFunctionBegin; 1478 if (flag == SOR_APPLY_UPPER) { 1479 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1480 PetscFunctionReturn(0); 1481 } 1482 1483 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1484 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1485 } 1486 1487 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1488 if (flag & SOR_ZERO_INITIAL_GUESS) { 1489 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1490 its--; 1491 } 1492 1493 while (its--) { 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 1497 /* update rhs: bb1 = bb - B*x */ 1498 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1499 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1500 1501 /* local sweep */ 1502 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1503 } 1504 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1505 if (flag & SOR_ZERO_INITIAL_GUESS) { 1506 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1507 its--; 1508 } 1509 while (its--) { 1510 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 1513 /* update rhs: bb1 = bb - B*x */ 1514 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1516 1517 /* local sweep */ 1518 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1519 } 1520 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1521 if (flag & SOR_ZERO_INITIAL_GUESS) { 1522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1523 its--; 1524 } 1525 while (its--) { 1526 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1527 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 1529 /* update rhs: bb1 = bb - B*x */ 1530 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1531 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1532 1533 /* local sweep */ 1534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1535 } 1536 } else if (flag & SOR_EISENSTAT) { 1537 Vec xx1; 1538 1539 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1540 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1541 1542 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1543 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 if (!mat->diag) { 1545 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1546 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1547 } 1548 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1549 if (hasop) { 1550 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1551 } else { 1552 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1553 } 1554 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1555 1556 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1560 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1561 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1562 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1563 1564 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatPermute_MPIAIJ" 1570 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1571 { 1572 Mat aA,aB,Aperm; 1573 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1574 PetscScalar *aa,*ba; 1575 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1576 PetscSF rowsf,sf; 1577 IS parcolp = NULL; 1578 PetscBool done; 1579 PetscErrorCode ierr; 1580 1581 PetscFunctionBegin; 1582 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1583 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1584 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1585 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1586 1587 /* Invert row permutation to find out where my rows should go */ 1588 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1589 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1590 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1591 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1592 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1594 1595 /* Invert column permutation to find out where my columns should go */ 1596 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1597 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1598 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1599 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1600 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1602 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1603 1604 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1605 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1606 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1607 1608 /* Find out where my gcols should go */ 1609 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1610 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1611 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1612 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1613 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1614 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1616 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1617 1618 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1619 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1620 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1621 for (i=0; i<m; i++) { 1622 PetscInt row = rdest[i],rowner; 1623 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1624 for (j=ai[i]; j<ai[i+1]; j++) { 1625 PetscInt cowner,col = cdest[aj[j]]; 1626 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1627 if (rowner == cowner) dnnz[i]++; 1628 else onnz[i]++; 1629 } 1630 for (j=bi[i]; j<bi[i+1]; j++) { 1631 PetscInt cowner,col = gcdest[bj[j]]; 1632 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1633 if (rowner == cowner) dnnz[i]++; 1634 else onnz[i]++; 1635 } 1636 } 1637 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1638 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1639 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1642 1643 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1644 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1645 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) { 1647 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1648 PetscInt j0,rowlen; 1649 rowlen = ai[i+1] - ai[i]; 1650 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1651 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1652 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 rowlen = bi[i+1] - bi[i]; 1655 for (j0=j=0; j<rowlen; j0=j) { 1656 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1657 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1658 } 1659 } 1660 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1663 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1664 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1665 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1666 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1667 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1668 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1669 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1670 *B = Aperm; 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1676 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1677 { 1678 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1679 Mat A = mat->A,B = mat->B; 1680 PetscErrorCode ierr; 1681 PetscReal isend[5],irecv[5]; 1682 1683 PetscFunctionBegin; 1684 info->block_size = 1.0; 1685 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1686 1687 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1688 isend[3] = info->memory; isend[4] = info->mallocs; 1689 1690 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1691 1692 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1693 isend[3] += info->memory; isend[4] += info->mallocs; 1694 if (flag == MAT_LOCAL) { 1695 info->nz_used = isend[0]; 1696 info->nz_allocated = isend[1]; 1697 info->nz_unneeded = isend[2]; 1698 info->memory = isend[3]; 1699 info->mallocs = isend[4]; 1700 } else if (flag == MAT_GLOBAL_MAX) { 1701 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1702 1703 info->nz_used = irecv[0]; 1704 info->nz_allocated = irecv[1]; 1705 info->nz_unneeded = irecv[2]; 1706 info->memory = irecv[3]; 1707 info->mallocs = irecv[4]; 1708 } else if (flag == MAT_GLOBAL_SUM) { 1709 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1710 1711 info->nz_used = irecv[0]; 1712 info->nz_allocated = irecv[1]; 1713 info->nz_unneeded = irecv[2]; 1714 info->memory = irecv[3]; 1715 info->mallocs = irecv[4]; 1716 } 1717 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1718 info->fill_ratio_needed = 0; 1719 info->factor_mallocs = 0; 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatSetOption_MPIAIJ" 1725 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1726 { 1727 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 switch (op) { 1732 case MAT_NEW_NONZERO_LOCATIONS: 1733 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1734 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1735 case MAT_KEEP_NONZERO_PATTERN: 1736 case MAT_NEW_NONZERO_LOCATION_ERR: 1737 case MAT_USE_INODES: 1738 case MAT_IGNORE_ZERO_ENTRIES: 1739 MatCheckPreallocated(A,1); 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_ROW_ORIENTED: 1744 a->roworiented = flg; 1745 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_NEW_DIAGONALS: 1750 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1751 break; 1752 case MAT_IGNORE_OFF_PROC_ENTRIES: 1753 a->donotstash = flg; 1754 break; 1755 case MAT_SPD: 1756 A->spd_set = PETSC_TRUE; 1757 A->spd = flg; 1758 if (flg) { 1759 A->symmetric = PETSC_TRUE; 1760 A->structurally_symmetric = PETSC_TRUE; 1761 A->symmetric_set = PETSC_TRUE; 1762 A->structurally_symmetric_set = PETSC_TRUE; 1763 } 1764 break; 1765 case MAT_SYMMETRIC: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 case MAT_STRUCTURALLY_SYMMETRIC: 1769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1770 break; 1771 case MAT_HERMITIAN: 1772 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1773 break; 1774 case MAT_SYMMETRY_ETERNAL: 1775 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1776 break; 1777 default: 1778 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatGetRow_MPIAIJ" 1785 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1786 { 1787 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1788 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1789 PetscErrorCode ierr; 1790 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1791 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1792 PetscInt *cmap,*idx_p; 1793 1794 PetscFunctionBegin; 1795 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1796 mat->getrowactive = PETSC_TRUE; 1797 1798 if (!mat->rowvalues && (idx || v)) { 1799 /* 1800 allocate enough space to hold information from the longest row. 1801 */ 1802 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1803 PetscInt max = 1,tmp; 1804 for (i=0; i<matin->rmap->n; i++) { 1805 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1806 if (max < tmp) max = tmp; 1807 } 1808 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1809 } 1810 1811 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1812 lrow = row - rstart; 1813 1814 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1815 if (!v) {pvA = 0; pvB = 0;} 1816 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1817 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1818 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1819 nztot = nzA + nzB; 1820 1821 cmap = mat->garray; 1822 if (v || idx) { 1823 if (nztot) { 1824 /* Sort by increasing column numbers, assuming A and B already sorted */ 1825 PetscInt imark = -1; 1826 if (v) { 1827 *v = v_p = mat->rowvalues; 1828 for (i=0; i<nzB; i++) { 1829 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1830 else break; 1831 } 1832 imark = i; 1833 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1834 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1835 } 1836 if (idx) { 1837 *idx = idx_p = mat->rowindices; 1838 if (imark > -1) { 1839 for (i=0; i<imark; i++) { 1840 idx_p[i] = cmap[cworkB[i]]; 1841 } 1842 } else { 1843 for (i=0; i<nzB; i++) { 1844 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1845 else break; 1846 } 1847 imark = i; 1848 } 1849 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1850 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1851 } 1852 } else { 1853 if (idx) *idx = 0; 1854 if (v) *v = 0; 1855 } 1856 } 1857 *nz = nztot; 1858 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1859 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1865 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1866 { 1867 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1868 1869 PetscFunctionBegin; 1870 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1871 aij->getrowactive = PETSC_FALSE; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatNorm_MPIAIJ" 1877 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1878 { 1879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1880 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1881 PetscErrorCode ierr; 1882 PetscInt i,j,cstart = mat->cmap->rstart; 1883 PetscReal sum = 0.0; 1884 MatScalar *v; 1885 1886 PetscFunctionBegin; 1887 if (aij->size == 1) { 1888 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1889 } else { 1890 if (type == NORM_FROBENIUS) { 1891 v = amat->a; 1892 for (i=0; i<amat->nz; i++) { 1893 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1894 } 1895 v = bmat->a; 1896 for (i=0; i<bmat->nz; i++) { 1897 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1898 } 1899 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1900 *norm = PetscSqrtReal(*norm); 1901 } else if (type == NORM_1) { /* max column norm */ 1902 PetscReal *tmp,*tmp2; 1903 PetscInt *jj,*garray = aij->garray; 1904 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1905 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1906 *norm = 0.0; 1907 v = amat->a; jj = amat->j; 1908 for (j=0; j<amat->nz; j++) { 1909 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1910 } 1911 v = bmat->a; jj = bmat->j; 1912 for (j=0; j<bmat->nz; j++) { 1913 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1914 } 1915 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1916 for (j=0; j<mat->cmap->N; j++) { 1917 if (tmp2[j] > *norm) *norm = tmp2[j]; 1918 } 1919 ierr = PetscFree(tmp);CHKERRQ(ierr); 1920 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1921 } else if (type == NORM_INFINITY) { /* max row norm */ 1922 PetscReal ntemp = 0.0; 1923 for (j=0; j<aij->A->rmap->n; j++) { 1924 v = amat->a + amat->i[j]; 1925 sum = 0.0; 1926 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1927 sum += PetscAbsScalar(*v); v++; 1928 } 1929 v = bmat->a + bmat->i[j]; 1930 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1931 sum += PetscAbsScalar(*v); v++; 1932 } 1933 if (sum > ntemp) ntemp = sum; 1934 } 1935 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatTranspose_MPIAIJ" 1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1944 { 1945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1946 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1947 PetscErrorCode ierr; 1948 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1949 PetscInt cstart = A->cmap->rstart,ncol; 1950 Mat B; 1951 MatScalar *array; 1952 1953 PetscFunctionBegin; 1954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1955 1956 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1957 ai = Aloc->i; aj = Aloc->j; 1958 bi = Bloc->i; bj = Bloc->j; 1959 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1960 PetscInt *d_nnz,*g_nnz,*o_nnz; 1961 PetscSFNode *oloc; 1962 PETSC_UNUSED PetscSF sf; 1963 1964 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1965 /* compute d_nnz for preallocation */ 1966 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) { 1968 d_nnz[aj[i]]++; 1969 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1970 } 1971 /* compute local off-diagonal contributions */ 1972 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1973 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1974 /* map those to global */ 1975 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1976 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1977 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1978 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1980 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1982 1983 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1984 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1985 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1986 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1987 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1988 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1989 } else { 1990 B = *matout; 1991 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1993 } 1994 1995 /* copy over the A part */ 1996 array = Aloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<ma; i++) { 1999 ncol = ai[i+1]-ai[i]; 2000 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2001 row++; 2002 array += ncol; aj += ncol; 2003 } 2004 aj = Aloc->j; 2005 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2006 2007 /* copy over the B part */ 2008 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2009 array = Bloc->a; 2010 row = A->rmap->rstart; 2011 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2012 cols_tmp = cols; 2013 for (i=0; i<mb; i++) { 2014 ncol = bi[i+1]-bi[i]; 2015 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; cols_tmp += ncol; 2018 } 2019 ierr = PetscFree(cols);CHKERRQ(ierr); 2020 2021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2024 *matout = B; 2025 } else { 2026 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2034 { 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat a = aij->A,b = aij->B; 2037 PetscErrorCode ierr; 2038 PetscInt s1,s2,s3; 2039 2040 PetscFunctionBegin; 2041 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2042 if (rr) { 2043 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2044 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2045 /* Overlap communication with computation. */ 2046 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 } 2048 if (ll) { 2049 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2050 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2051 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2052 } 2053 /* scale the diagonal block */ 2054 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2055 2056 if (rr) { 2057 /* Do a scatter end and then right scale the off-diagonal block */ 2058 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2059 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2067 { 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatEqual_MPIAIJ" 2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2079 { 2080 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2081 Mat a,b,c,d; 2082 PetscBool flg; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 a = matA->A; b = matA->B; 2087 c = matB->A; d = matB->B; 2088 2089 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2090 if (flg) { 2091 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2092 } 2093 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatCopy_MPIAIJ" 2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2100 { 2101 PetscErrorCode ierr; 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2104 2105 PetscFunctionBegin; 2106 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2107 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2108 /* because of the column compression in the off-processor part of the matrix a->B, 2109 the number of columns in a->B and b->B may be different, hence we cannot call 2110 the MatCopy() directly on the two parts. If need be, we can provide a more 2111 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2112 then copying the submatrices */ 2113 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2116 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetUp_MPIAIJ" 2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2124 { 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 #undef __FUNCT__ 2133 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2135 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2136 { 2137 PetscInt i,m=Y->rmap->N; 2138 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2139 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2140 const PetscInt *xi = x->i,*yi = y->i; 2141 2142 PetscFunctionBegin; 2143 /* Set the number of nonzeros in the new matrix */ 2144 for (i=0; i<m; i++) { 2145 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2146 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2147 nnz[i] = 0; 2148 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2149 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2150 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2151 nnz[i]++; 2152 } 2153 for (; k<nzy; k++) nnz[i]++; 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatAXPY_MPIAIJ" 2160 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2161 { 2162 PetscErrorCode ierr; 2163 PetscInt i; 2164 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2165 PetscBLASInt bnz,one=1; 2166 Mat_SeqAIJ *x,*y; 2167 2168 PetscFunctionBegin; 2169 if (str == SAME_NONZERO_PATTERN) { 2170 PetscScalar alpha = a; 2171 x = (Mat_SeqAIJ*)xx->A->data; 2172 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2173 y = (Mat_SeqAIJ*)yy->A->data; 2174 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2175 x = (Mat_SeqAIJ*)xx->B->data; 2176 y = (Mat_SeqAIJ*)yy->B->data; 2177 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2178 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2179 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2180 } else if (str == SUBSET_NONZERO_PATTERN) { 2181 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2182 2183 x = (Mat_SeqAIJ*)xx->B->data; 2184 y = (Mat_SeqAIJ*)yy->B->data; 2185 if (y->xtoy && y->XtoY != xx->B) { 2186 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2187 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2188 } 2189 if (!y->xtoy) { /* get xtoy */ 2190 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2191 y->XtoY = xx->B; 2192 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2193 } 2194 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2195 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2196 } else { 2197 Mat B; 2198 PetscInt *nnz_d,*nnz_o; 2199 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2200 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2201 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2202 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2203 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2204 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2205 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2206 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2207 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2208 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2209 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2210 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2211 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2212 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatConjugate_MPIAIJ" 2221 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2222 { 2223 #if defined(PETSC_USE_COMPLEX) 2224 PetscErrorCode ierr; 2225 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2226 2227 PetscFunctionBegin; 2228 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2229 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2230 #else 2231 PetscFunctionBegin; 2232 #endif 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatRealPart_MPIAIJ" 2238 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2239 { 2240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2241 PetscErrorCode ierr; 2242 2243 PetscFunctionBegin; 2244 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2245 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #undef __FUNCT__ 2250 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2251 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2252 { 2253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2254 PetscErrorCode ierr; 2255 2256 PetscFunctionBegin; 2257 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2258 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #if defined(PETSC_HAVE_PBGL) 2263 2264 #include <boost/parallel/mpi/bsp_process_group.hpp> 2265 #include <boost/graph/distributed/ilu_default_graph.hpp> 2266 #include <boost/graph/distributed/ilu_0_block.hpp> 2267 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2268 #include <boost/graph/distributed/petsc/interface.hpp> 2269 #include <boost/multi_array.hpp> 2270 #include <boost/parallel/distributed_property_map->hpp> 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2274 /* 2275 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2276 */ 2277 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2278 { 2279 namespace petsc = boost::distributed::petsc; 2280 2281 namespace graph_dist = boost::graph::distributed; 2282 using boost::graph::distributed::ilu_default::process_group_type; 2283 using boost::graph::ilu_permuted; 2284 2285 PetscBool row_identity, col_identity; 2286 PetscContainer c; 2287 PetscInt m, n, M, N; 2288 PetscErrorCode ierr; 2289 2290 PetscFunctionBegin; 2291 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2292 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2293 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2294 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2295 2296 process_group_type pg; 2297 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2298 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2299 lgraph_type& level_graph = *lgraph_p; 2300 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2301 2302 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2303 ilu_permuted(level_graph); 2304 2305 /* put together the new matrix */ 2306 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2307 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2308 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2309 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2310 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2311 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2312 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2313 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2314 2315 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2316 ierr = PetscContainerSetPointer(c, lgraph_p); 2317 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2318 ierr = PetscContainerDestroy(&c); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2324 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2325 { 2326 PetscFunctionBegin; 2327 PetscFunctionReturn(0); 2328 } 2329 2330 #undef __FUNCT__ 2331 #define __FUNCT__ "MatSolve_MPIAIJ" 2332 /* 2333 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2334 */ 2335 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2336 { 2337 namespace graph_dist = boost::graph::distributed; 2338 2339 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2340 lgraph_type *lgraph_p; 2341 PetscContainer c; 2342 PetscErrorCode ierr; 2343 2344 PetscFunctionBegin; 2345 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2346 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2347 ierr = VecCopy(b, x);CHKERRQ(ierr); 2348 2349 PetscScalar *array_x; 2350 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2351 PetscInt sx; 2352 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2353 2354 PetscScalar *array_b; 2355 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2356 PetscInt sb; 2357 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2358 2359 lgraph_type& level_graph = *lgraph_p; 2360 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2361 2362 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2363 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2364 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2365 2366 typedef boost::iterator_property_map<array_ref_type::iterator, 2367 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2368 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2369 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2370 2371 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2372 PetscFunctionReturn(0); 2373 } 2374 #endif 2375 2376 #undef __FUNCT__ 2377 #define __FUNCT__ "MatDestroy_MatRedundant" 2378 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2379 { 2380 PetscErrorCode ierr; 2381 Mat_Redundant *redund; 2382 PetscInt i; 2383 PetscMPIInt size; 2384 2385 PetscFunctionBegin; 2386 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2387 if (size == 1) { 2388 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2389 redund = a->redundant; 2390 } else { 2391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2392 redund = a->redundant; 2393 } 2394 if (redund){ 2395 if (redund->matseq) { /* via MatGetSubMatrices() */ 2396 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2397 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2398 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2399 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2400 } else { 2401 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2402 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2403 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2404 for (i=0; i<redund->nrecvs; i++) { 2405 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2406 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2407 } 2408 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2409 } 2410 2411 if (redund->psubcomm) { 2412 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2413 } 2414 ierr = redund->Destroy(A);CHKERRQ(ierr); 2415 ierr = PetscFree(redund);CHKERRQ(ierr); 2416 } 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2422 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2423 { 2424 PetscMPIInt rank,size; 2425 MPI_Comm comm; 2426 PetscErrorCode ierr; 2427 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2428 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2429 PetscInt *rowrange = mat->rmap->range; 2430 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2431 Mat A = aij->A,B=aij->B,C=*matredundant; 2432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2433 PetscScalar *sbuf_a; 2434 PetscInt nzlocal=a->nz+b->nz; 2435 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2436 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2437 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2438 MatScalar *aworkA,*aworkB; 2439 PetscScalar *vals; 2440 PetscMPIInt tag1,tag2,tag3,imdex; 2441 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2442 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2443 MPI_Status recv_status,*send_status; 2444 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2445 PetscInt **rbuf_j=NULL; 2446 PetscScalar **rbuf_a=NULL; 2447 Mat_Redundant *redund =NULL; 2448 2449 PetscFunctionBegin; 2450 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2451 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2452 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2453 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2454 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2455 2456 if (reuse == MAT_REUSE_MATRIX) { 2457 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2458 if (subsize == 1) { 2459 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2460 redund = c->redundant; 2461 } else { 2462 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2463 redund = c->redundant; 2464 } 2465 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2466 2467 nsends = redund->nsends; 2468 nrecvs = redund->nrecvs; 2469 send_rank = redund->send_rank; 2470 recv_rank = redund->recv_rank; 2471 sbuf_nz = redund->sbuf_nz; 2472 rbuf_nz = redund->rbuf_nz; 2473 sbuf_j = redund->sbuf_j; 2474 sbuf_a = redund->sbuf_a; 2475 rbuf_j = redund->rbuf_j; 2476 rbuf_a = redund->rbuf_a; 2477 } 2478 2479 if (reuse == MAT_INITIAL_MATRIX) { 2480 PetscInt nleftover,np_subcomm; 2481 2482 /* get the destination processors' id send_rank, nsends and nrecvs */ 2483 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2484 2485 np_subcomm = size/nsubcomm; 2486 nleftover = size - nsubcomm*np_subcomm; 2487 2488 /* block of codes below is specific for INTERLACED */ 2489 /* ------------------------------------------------*/ 2490 nsends = 0; nrecvs = 0; 2491 for (i=0; i<size; i++) { 2492 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2493 send_rank[nsends++] = i; 2494 recv_rank[nrecvs++] = i; 2495 } 2496 } 2497 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2498 i = size-nleftover-1; 2499 j = 0; 2500 while (j < nsubcomm - nleftover) { 2501 send_rank[nsends++] = i; 2502 i--; j++; 2503 } 2504 } 2505 2506 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2507 for (i=0; i<nleftover; i++) { 2508 recv_rank[nrecvs++] = size-nleftover+i; 2509 } 2510 } 2511 /*----------------------------------------------*/ 2512 2513 /* allocate sbuf_j, sbuf_a */ 2514 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2515 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2516 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2517 /* 2518 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2519 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2520 */ 2521 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2522 2523 /* copy mat's local entries into the buffers */ 2524 if (reuse == MAT_INITIAL_MATRIX) { 2525 rownz_max = 0; 2526 rptr = sbuf_j; 2527 cols = sbuf_j + rend-rstart + 1; 2528 vals = sbuf_a; 2529 rptr[0] = 0; 2530 for (i=0; i<rend-rstart; i++) { 2531 row = i + rstart; 2532 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2533 ncols = nzA + nzB; 2534 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2535 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2536 /* load the column indices for this row into cols */ 2537 lwrite = 0; 2538 for (l=0; l<nzB; l++) { 2539 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2540 vals[lwrite] = aworkB[l]; 2541 cols[lwrite++] = ctmp; 2542 } 2543 } 2544 for (l=0; l<nzA; l++) { 2545 vals[lwrite] = aworkA[l]; 2546 cols[lwrite++] = cstart + cworkA[l]; 2547 } 2548 for (l=0; l<nzB; l++) { 2549 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2550 vals[lwrite] = aworkB[l]; 2551 cols[lwrite++] = ctmp; 2552 } 2553 } 2554 vals += ncols; 2555 cols += ncols; 2556 rptr[i+1] = rptr[i] + ncols; 2557 if (rownz_max < ncols) rownz_max = ncols; 2558 } 2559 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2560 } else { /* only copy matrix values into sbuf_a */ 2561 rptr = sbuf_j; 2562 vals = sbuf_a; 2563 rptr[0] = 0; 2564 for (i=0; i<rend-rstart; i++) { 2565 row = i + rstart; 2566 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2567 ncols = nzA + nzB; 2568 cworkB = b->j + b->i[i]; 2569 aworkA = a->a + a->i[i]; 2570 aworkB = b->a + b->i[i]; 2571 lwrite = 0; 2572 for (l=0; l<nzB; l++) { 2573 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2574 } 2575 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2576 for (l=0; l<nzB; l++) { 2577 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2578 } 2579 vals += ncols; 2580 rptr[i+1] = rptr[i] + ncols; 2581 } 2582 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2583 2584 /* send nzlocal to others, and recv other's nzlocal */ 2585 /*--------------------------------------------------*/ 2586 if (reuse == MAT_INITIAL_MATRIX) { 2587 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2588 2589 s_waits2 = s_waits3 + nsends; 2590 s_waits1 = s_waits2 + nsends; 2591 r_waits1 = s_waits1 + nsends; 2592 r_waits2 = r_waits1 + nrecvs; 2593 r_waits3 = r_waits2 + nrecvs; 2594 } else { 2595 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2596 2597 r_waits3 = s_waits3 + nsends; 2598 } 2599 2600 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2601 if (reuse == MAT_INITIAL_MATRIX) { 2602 /* get new tags to keep the communication clean */ 2603 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2604 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2605 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2606 2607 /* post receives of other's nzlocal */ 2608 for (i=0; i<nrecvs; i++) { 2609 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2610 } 2611 /* send nzlocal to others */ 2612 for (i=0; i<nsends; i++) { 2613 sbuf_nz[i] = nzlocal; 2614 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2615 } 2616 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2617 count = nrecvs; 2618 while (count) { 2619 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2620 2621 recv_rank[imdex] = recv_status.MPI_SOURCE; 2622 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2623 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2624 2625 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2626 2627 rbuf_nz[imdex] += i + 2; 2628 2629 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2630 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2631 count--; 2632 } 2633 /* wait on sends of nzlocal */ 2634 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2635 /* send mat->i,j to others, and recv from other's */ 2636 /*------------------------------------------------*/ 2637 for (i=0; i<nsends; i++) { 2638 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2639 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2640 } 2641 /* wait on receives of mat->i,j */ 2642 /*------------------------------*/ 2643 count = nrecvs; 2644 while (count) { 2645 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2646 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2647 count--; 2648 } 2649 /* wait on sends of mat->i,j */ 2650 /*---------------------------*/ 2651 if (nsends) { 2652 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2653 } 2654 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2655 2656 /* post receives, send and receive mat->a */ 2657 /*----------------------------------------*/ 2658 for (imdex=0; imdex<nrecvs; imdex++) { 2659 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2660 } 2661 for (i=0; i<nsends; i++) { 2662 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2663 } 2664 count = nrecvs; 2665 while (count) { 2666 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2667 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2668 count--; 2669 } 2670 if (nsends) { 2671 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2672 } 2673 2674 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2675 2676 /* create redundant matrix */ 2677 /*-------------------------*/ 2678 if (reuse == MAT_INITIAL_MATRIX) { 2679 const PetscInt *range; 2680 PetscInt rstart_sub,rend_sub,mloc_sub; 2681 2682 /* compute rownz_max for preallocation */ 2683 for (imdex=0; imdex<nrecvs; imdex++) { 2684 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2685 rptr = rbuf_j[imdex]; 2686 for (i=0; i<j; i++) { 2687 ncols = rptr[i+1] - rptr[i]; 2688 if (rownz_max < ncols) rownz_max = ncols; 2689 } 2690 } 2691 2692 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2693 2694 /* get local size of redundant matrix 2695 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2696 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2697 rstart_sub = range[nsubcomm*subrank]; 2698 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2699 rend_sub = range[nsubcomm*(subrank+1)]; 2700 } else { 2701 rend_sub = mat->rmap->N; 2702 } 2703 mloc_sub = rend_sub - rstart_sub; 2704 2705 if (M == N) { 2706 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2707 } else { /* non-square matrix */ 2708 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2709 } 2710 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2711 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2712 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2713 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2714 } else { 2715 C = *matredundant; 2716 } 2717 2718 /* insert local matrix entries */ 2719 rptr = sbuf_j; 2720 cols = sbuf_j + rend-rstart + 1; 2721 vals = sbuf_a; 2722 for (i=0; i<rend-rstart; i++) { 2723 row = i + rstart; 2724 ncols = rptr[i+1] - rptr[i]; 2725 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2726 vals += ncols; 2727 cols += ncols; 2728 } 2729 /* insert received matrix entries */ 2730 for (imdex=0; imdex<nrecvs; imdex++) { 2731 rstart = rowrange[recv_rank[imdex]]; 2732 rend = rowrange[recv_rank[imdex]+1]; 2733 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2734 rptr = rbuf_j[imdex]; 2735 cols = rbuf_j[imdex] + rend-rstart + 1; 2736 vals = rbuf_a[imdex]; 2737 for (i=0; i<rend-rstart; i++) { 2738 row = i + rstart; 2739 ncols = rptr[i+1] - rptr[i]; 2740 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2741 vals += ncols; 2742 cols += ncols; 2743 } 2744 } 2745 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 2748 if (reuse == MAT_INITIAL_MATRIX) { 2749 *matredundant = C; 2750 2751 /* create a supporting struct and attach it to C for reuse */ 2752 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2753 if (subsize == 1) { 2754 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2755 c->redundant = redund; 2756 } else { 2757 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2758 c->redundant = redund; 2759 } 2760 2761 redund->nzlocal = nzlocal; 2762 redund->nsends = nsends; 2763 redund->nrecvs = nrecvs; 2764 redund->send_rank = send_rank; 2765 redund->recv_rank = recv_rank; 2766 redund->sbuf_nz = sbuf_nz; 2767 redund->rbuf_nz = rbuf_nz; 2768 redund->sbuf_j = sbuf_j; 2769 redund->sbuf_a = sbuf_a; 2770 redund->rbuf_j = rbuf_j; 2771 redund->rbuf_a = rbuf_a; 2772 redund->psubcomm = NULL; 2773 2774 redund->Destroy = C->ops->destroy; 2775 C->ops->destroy = MatDestroy_MatRedundant; 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2782 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2783 { 2784 PetscErrorCode ierr; 2785 MPI_Comm comm; 2786 PetscMPIInt size,subsize; 2787 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2788 Mat_Redundant *redund=NULL; 2789 PetscSubcomm psubcomm=NULL; 2790 MPI_Comm subcomm_in=subcomm; 2791 Mat *matseq; 2792 IS isrow,iscol; 2793 2794 PetscFunctionBegin; 2795 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2796 if (reuse == MAT_INITIAL_MATRIX) { 2797 /* create psubcomm, then get subcomm */ 2798 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2799 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2800 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2801 2802 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2803 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2804 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2805 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2806 subcomm = psubcomm->comm; 2807 } else { /* retrieve psubcomm and subcomm */ 2808 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2809 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2810 if (subsize == 1) { 2811 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2812 redund = c->redundant; 2813 } else { 2814 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2815 redund = c->redundant; 2816 } 2817 psubcomm = redund->psubcomm; 2818 } 2819 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2820 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2821 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2822 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2823 if (subsize == 1) { 2824 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2825 c->redundant->psubcomm = psubcomm; 2826 } else { 2827 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2828 c->redundant->psubcomm = psubcomm ; 2829 } 2830 } 2831 PetscFunctionReturn(0); 2832 } 2833 } 2834 2835 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2836 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2837 if (reuse == MAT_INITIAL_MATRIX) { 2838 /* create a local sequential matrix matseq[0] */ 2839 mloc_sub = PETSC_DECIDE; 2840 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2841 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2842 rstart = rend - mloc_sub; 2843 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2844 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2845 } else { /* reuse == MAT_REUSE_MATRIX */ 2846 if (subsize == 1) { 2847 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2848 redund = c->redundant; 2849 } else { 2850 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2851 redund = c->redundant; 2852 } 2853 2854 isrow = redund->isrow; 2855 iscol = redund->iscol; 2856 matseq = redund->matseq; 2857 } 2858 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2859 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2860 2861 if (reuse == MAT_INITIAL_MATRIX) { 2862 /* create a supporting struct and attach it to C for reuse */ 2863 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2864 if (subsize == 1) { 2865 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2866 c->redundant = redund; 2867 } else { 2868 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2869 c->redundant = redund; 2870 } 2871 redund->isrow = isrow; 2872 redund->iscol = iscol; 2873 redund->matseq = matseq; 2874 redund->psubcomm = psubcomm; 2875 redund->Destroy = (*matredundant)->ops->destroy; 2876 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2877 } 2878 PetscFunctionReturn(0); 2879 } 2880 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2883 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2884 { 2885 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2886 PetscErrorCode ierr; 2887 PetscInt i,*idxb = 0; 2888 PetscScalar *va,*vb; 2889 Vec vtmp; 2890 2891 PetscFunctionBegin; 2892 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2893 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2894 if (idx) { 2895 for (i=0; i<A->rmap->n; i++) { 2896 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2897 } 2898 } 2899 2900 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2901 if (idx) { 2902 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2903 } 2904 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2905 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2906 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2909 va[i] = vb[i]; 2910 if (idx) idx[i] = a->garray[idxb[i]]; 2911 } 2912 } 2913 2914 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2916 ierr = PetscFree(idxb);CHKERRQ(ierr); 2917 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2923 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2924 { 2925 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2926 PetscErrorCode ierr; 2927 PetscInt i,*idxb = 0; 2928 PetscScalar *va,*vb; 2929 Vec vtmp; 2930 2931 PetscFunctionBegin; 2932 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2933 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2934 if (idx) { 2935 for (i=0; i<A->cmap->n; i++) { 2936 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2937 } 2938 } 2939 2940 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2941 if (idx) { 2942 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2943 } 2944 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2945 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2946 2947 for (i=0; i<A->rmap->n; i++) { 2948 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2949 va[i] = vb[i]; 2950 if (idx) idx[i] = a->garray[idxb[i]]; 2951 } 2952 } 2953 2954 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2955 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2956 ierr = PetscFree(idxb);CHKERRQ(ierr); 2957 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2963 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2964 { 2965 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2966 PetscInt n = A->rmap->n; 2967 PetscInt cstart = A->cmap->rstart; 2968 PetscInt *cmap = mat->garray; 2969 PetscInt *diagIdx, *offdiagIdx; 2970 Vec diagV, offdiagV; 2971 PetscScalar *a, *diagA, *offdiagA; 2972 PetscInt r; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2979 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2980 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 for (r = 0; r < n; ++r) { 2985 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2986 a[r] = diagA[r]; 2987 idx[r] = cstart + diagIdx[r]; 2988 } else { 2989 a[r] = offdiagA[r]; 2990 idx[r] = cmap[offdiagIdx[r]]; 2991 } 2992 } 2993 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2997 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2998 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3004 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3005 { 3006 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3007 PetscInt n = A->rmap->n; 3008 PetscInt cstart = A->cmap->rstart; 3009 PetscInt *cmap = mat->garray; 3010 PetscInt *diagIdx, *offdiagIdx; 3011 Vec diagV, offdiagV; 3012 PetscScalar *a, *diagA, *offdiagA; 3013 PetscInt r; 3014 PetscErrorCode ierr; 3015 3016 PetscFunctionBegin; 3017 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 3018 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3019 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3020 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3021 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3022 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3023 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3024 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3025 for (r = 0; r < n; ++r) { 3026 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3027 a[r] = diagA[r]; 3028 idx[r] = cstart + diagIdx[r]; 3029 } else { 3030 a[r] = offdiagA[r]; 3031 idx[r] = cmap[offdiagIdx[r]]; 3032 } 3033 } 3034 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3035 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3036 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3037 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3038 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3039 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3045 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3046 { 3047 PetscErrorCode ierr; 3048 Mat *dummy; 3049 3050 PetscFunctionBegin; 3051 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3052 *newmat = *dummy; 3053 ierr = PetscFree(dummy);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 #undef __FUNCT__ 3058 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3059 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3060 { 3061 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3062 PetscErrorCode ierr; 3063 3064 PetscFunctionBegin; 3065 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3071 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3072 { 3073 PetscErrorCode ierr; 3074 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3075 3076 PetscFunctionBegin; 3077 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3078 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3079 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3080 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 /* -------------------------------------------------------------------*/ 3085 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3086 MatGetRow_MPIAIJ, 3087 MatRestoreRow_MPIAIJ, 3088 MatMult_MPIAIJ, 3089 /* 4*/ MatMultAdd_MPIAIJ, 3090 MatMultTranspose_MPIAIJ, 3091 MatMultTransposeAdd_MPIAIJ, 3092 #if defined(PETSC_HAVE_PBGL) 3093 MatSolve_MPIAIJ, 3094 #else 3095 0, 3096 #endif 3097 0, 3098 0, 3099 /*10*/ 0, 3100 0, 3101 0, 3102 MatSOR_MPIAIJ, 3103 MatTranspose_MPIAIJ, 3104 /*15*/ MatGetInfo_MPIAIJ, 3105 MatEqual_MPIAIJ, 3106 MatGetDiagonal_MPIAIJ, 3107 MatDiagonalScale_MPIAIJ, 3108 MatNorm_MPIAIJ, 3109 /*20*/ MatAssemblyBegin_MPIAIJ, 3110 MatAssemblyEnd_MPIAIJ, 3111 MatSetOption_MPIAIJ, 3112 MatZeroEntries_MPIAIJ, 3113 /*24*/ MatZeroRows_MPIAIJ, 3114 0, 3115 #if defined(PETSC_HAVE_PBGL) 3116 0, 3117 #else 3118 0, 3119 #endif 3120 0, 3121 0, 3122 /*29*/ MatSetUp_MPIAIJ, 3123 #if defined(PETSC_HAVE_PBGL) 3124 0, 3125 #else 3126 0, 3127 #endif 3128 0, 3129 0, 3130 0, 3131 /*34*/ MatDuplicate_MPIAIJ, 3132 0, 3133 0, 3134 0, 3135 0, 3136 /*39*/ MatAXPY_MPIAIJ, 3137 MatGetSubMatrices_MPIAIJ, 3138 MatIncreaseOverlap_MPIAIJ, 3139 MatGetValues_MPIAIJ, 3140 MatCopy_MPIAIJ, 3141 /*44*/ MatGetRowMax_MPIAIJ, 3142 MatScale_MPIAIJ, 3143 0, 3144 0, 3145 MatZeroRowsColumns_MPIAIJ, 3146 /*49*/ MatSetRandom_MPIAIJ, 3147 0, 3148 0, 3149 0, 3150 0, 3151 /*54*/ MatFDColoringCreate_MPIXAIJ, 3152 0, 3153 MatSetUnfactored_MPIAIJ, 3154 MatPermute_MPIAIJ, 3155 0, 3156 /*59*/ MatGetSubMatrix_MPIAIJ, 3157 MatDestroy_MPIAIJ, 3158 MatView_MPIAIJ, 3159 0, 3160 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3161 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3162 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3163 0, 3164 0, 3165 0, 3166 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3167 MatGetRowMinAbs_MPIAIJ, 3168 0, 3169 MatSetColoring_MPIAIJ, 3170 0, 3171 MatSetValuesAdifor_MPIAIJ, 3172 /*75*/ MatFDColoringApply_AIJ, 3173 0, 3174 0, 3175 0, 3176 MatFindZeroDiagonals_MPIAIJ, 3177 /*80*/ 0, 3178 0, 3179 0, 3180 /*83*/ MatLoad_MPIAIJ, 3181 0, 3182 0, 3183 0, 3184 0, 3185 0, 3186 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3187 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3188 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3189 MatPtAP_MPIAIJ_MPIAIJ, 3190 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3191 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3192 0, 3193 0, 3194 0, 3195 0, 3196 /*99*/ 0, 3197 0, 3198 0, 3199 MatConjugate_MPIAIJ, 3200 0, 3201 /*104*/MatSetValuesRow_MPIAIJ, 3202 MatRealPart_MPIAIJ, 3203 MatImaginaryPart_MPIAIJ, 3204 0, 3205 0, 3206 /*109*/0, 3207 MatGetRedundantMatrix_MPIAIJ, 3208 MatGetRowMin_MPIAIJ, 3209 0, 3210 0, 3211 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3212 0, 3213 0, 3214 0, 3215 0, 3216 /*119*/0, 3217 0, 3218 0, 3219 0, 3220 MatGetMultiProcBlock_MPIAIJ, 3221 /*124*/MatFindNonzeroRows_MPIAIJ, 3222 MatGetColumnNorms_MPIAIJ, 3223 MatInvertBlockDiagonal_MPIAIJ, 3224 0, 3225 MatGetSubMatricesParallel_MPIAIJ, 3226 /*129*/0, 3227 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3228 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3229 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3230 0, 3231 /*134*/0, 3232 0, 3233 0, 3234 0, 3235 0, 3236 /*139*/0, 3237 0, 3238 0, 3239 MatFDColoringSetUp_MPIXAIJ, 3240 MatFindOffBlockDiagonalEntries_MPIAIJ 3241 }; 3242 3243 /* ----------------------------------------------------------------------------------------*/ 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3247 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3248 { 3249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3250 PetscErrorCode ierr; 3251 3252 PetscFunctionBegin; 3253 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3254 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3255 PetscFunctionReturn(0); 3256 } 3257 3258 #undef __FUNCT__ 3259 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3260 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3261 { 3262 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3263 PetscErrorCode ierr; 3264 3265 PetscFunctionBegin; 3266 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3267 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3268 PetscFunctionReturn(0); 3269 } 3270 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3273 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3274 { 3275 Mat_MPIAIJ *b; 3276 PetscErrorCode ierr; 3277 3278 PetscFunctionBegin; 3279 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3280 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3281 b = (Mat_MPIAIJ*)B->data; 3282 3283 if (!B->preallocated) { 3284 /* Explicitly create 2 MATSEQAIJ matrices. */ 3285 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3286 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3287 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3288 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3289 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3290 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3291 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3292 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3293 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3294 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3295 } 3296 3297 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3298 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3299 B->preallocated = PETSC_TRUE; 3300 PetscFunctionReturn(0); 3301 } 3302 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3305 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3306 { 3307 Mat mat; 3308 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3309 PetscErrorCode ierr; 3310 3311 PetscFunctionBegin; 3312 *newmat = 0; 3313 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3314 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3315 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3316 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3317 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3318 a = (Mat_MPIAIJ*)mat->data; 3319 3320 mat->factortype = matin->factortype; 3321 mat->assembled = PETSC_TRUE; 3322 mat->insertmode = NOT_SET_VALUES; 3323 mat->preallocated = PETSC_TRUE; 3324 3325 a->size = oldmat->size; 3326 a->rank = oldmat->rank; 3327 a->donotstash = oldmat->donotstash; 3328 a->roworiented = oldmat->roworiented; 3329 a->rowindices = 0; 3330 a->rowvalues = 0; 3331 a->getrowactive = PETSC_FALSE; 3332 3333 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3334 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3335 3336 if (oldmat->colmap) { 3337 #if defined(PETSC_USE_CTABLE) 3338 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3339 #else 3340 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3341 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3342 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3343 #endif 3344 } else a->colmap = 0; 3345 if (oldmat->garray) { 3346 PetscInt len; 3347 len = oldmat->B->cmap->n; 3348 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3349 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3350 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3351 } else a->garray = 0; 3352 3353 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3354 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3355 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3356 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3357 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3358 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3359 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3360 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3361 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3362 *newmat = mat; 3363 PetscFunctionReturn(0); 3364 } 3365 3366 3367 3368 #undef __FUNCT__ 3369 #define __FUNCT__ "MatLoad_MPIAIJ" 3370 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3371 { 3372 PetscScalar *vals,*svals; 3373 MPI_Comm comm; 3374 PetscErrorCode ierr; 3375 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3376 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3377 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3378 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3379 PetscInt cend,cstart,n,*rowners,sizesset=1; 3380 int fd; 3381 PetscInt bs = 1; 3382 3383 PetscFunctionBegin; 3384 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3385 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3386 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3387 if (!rank) { 3388 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3389 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3390 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3391 } 3392 3393 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3394 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3395 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3396 3397 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3398 3399 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3400 M = header[1]; N = header[2]; 3401 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3402 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3403 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3404 3405 /* If global sizes are set, check if they are consistent with that given in the file */ 3406 if (sizesset) { 3407 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3408 } 3409 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3410 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3411 3412 /* determine ownership of all (block) rows */ 3413 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3414 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3415 else m = newMat->rmap->n; /* Set by user */ 3416 3417 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3418 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3419 3420 /* First process needs enough room for process with most rows */ 3421 if (!rank) { 3422 mmax = rowners[1]; 3423 for (i=2; i<=size; i++) { 3424 mmax = PetscMax(mmax, rowners[i]); 3425 } 3426 } else mmax = -1; /* unused, but compilers complain */ 3427 3428 rowners[0] = 0; 3429 for (i=2; i<=size; i++) { 3430 rowners[i] += rowners[i-1]; 3431 } 3432 rstart = rowners[rank]; 3433 rend = rowners[rank+1]; 3434 3435 /* distribute row lengths to all processors */ 3436 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3437 if (!rank) { 3438 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3439 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3440 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3441 for (j=0; j<m; j++) { 3442 procsnz[0] += ourlens[j]; 3443 } 3444 for (i=1; i<size; i++) { 3445 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3446 /* calculate the number of nonzeros on each processor */ 3447 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3448 procsnz[i] += rowlengths[j]; 3449 } 3450 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3451 } 3452 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3453 } else { 3454 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3455 } 3456 3457 if (!rank) { 3458 /* determine max buffer needed and allocate it */ 3459 maxnz = 0; 3460 for (i=0; i<size; i++) { 3461 maxnz = PetscMax(maxnz,procsnz[i]); 3462 } 3463 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3464 3465 /* read in my part of the matrix column indices */ 3466 nz = procsnz[0]; 3467 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3468 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3469 3470 /* read in every one elses and ship off */ 3471 for (i=1; i<size; i++) { 3472 nz = procsnz[i]; 3473 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3474 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3475 } 3476 ierr = PetscFree(cols);CHKERRQ(ierr); 3477 } else { 3478 /* determine buffer space needed for message */ 3479 nz = 0; 3480 for (i=0; i<m; i++) { 3481 nz += ourlens[i]; 3482 } 3483 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3484 3485 /* receive message of column indices*/ 3486 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3487 } 3488 3489 /* determine column ownership if matrix is not square */ 3490 if (N != M) { 3491 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3492 else n = newMat->cmap->n; 3493 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3494 cstart = cend - n; 3495 } else { 3496 cstart = rstart; 3497 cend = rend; 3498 n = cend - cstart; 3499 } 3500 3501 /* loop over local rows, determining number of off diagonal entries */ 3502 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3503 jj = 0; 3504 for (i=0; i<m; i++) { 3505 for (j=0; j<ourlens[i]; j++) { 3506 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3507 jj++; 3508 } 3509 } 3510 3511 for (i=0; i<m; i++) { 3512 ourlens[i] -= offlens[i]; 3513 } 3514 if (!sizesset) { 3515 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3516 } 3517 3518 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3519 3520 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3521 3522 for (i=0; i<m; i++) { 3523 ourlens[i] += offlens[i]; 3524 } 3525 3526 if (!rank) { 3527 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3528 3529 /* read in my part of the matrix numerical values */ 3530 nz = procsnz[0]; 3531 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3532 3533 /* insert into matrix */ 3534 jj = rstart; 3535 smycols = mycols; 3536 svals = vals; 3537 for (i=0; i<m; i++) { 3538 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3539 smycols += ourlens[i]; 3540 svals += ourlens[i]; 3541 jj++; 3542 } 3543 3544 /* read in other processors and ship out */ 3545 for (i=1; i<size; i++) { 3546 nz = procsnz[i]; 3547 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3548 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3549 } 3550 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3551 } else { 3552 /* receive numeric values */ 3553 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3554 3555 /* receive message of values*/ 3556 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3557 3558 /* insert into matrix */ 3559 jj = rstart; 3560 smycols = mycols; 3561 svals = vals; 3562 for (i=0; i<m; i++) { 3563 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3564 smycols += ourlens[i]; 3565 svals += ourlens[i]; 3566 jj++; 3567 } 3568 } 3569 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3570 ierr = PetscFree(vals);CHKERRQ(ierr); 3571 ierr = PetscFree(mycols);CHKERRQ(ierr); 3572 ierr = PetscFree(rowners);CHKERRQ(ierr); 3573 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3574 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3580 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3581 { 3582 PetscErrorCode ierr; 3583 IS iscol_local; 3584 PetscInt csize; 3585 3586 PetscFunctionBegin; 3587 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3588 if (call == MAT_REUSE_MATRIX) { 3589 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3590 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3591 } else { 3592 PetscInt cbs; 3593 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3594 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3595 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3596 } 3597 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3598 if (call == MAT_INITIAL_MATRIX) { 3599 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3600 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3601 } 3602 PetscFunctionReturn(0); 3603 } 3604 3605 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3606 #undef __FUNCT__ 3607 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3608 /* 3609 Not great since it makes two copies of the submatrix, first an SeqAIJ 3610 in local and then by concatenating the local matrices the end result. 3611 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3612 3613 Note: This requires a sequential iscol with all indices. 3614 */ 3615 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3616 { 3617 PetscErrorCode ierr; 3618 PetscMPIInt rank,size; 3619 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3620 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3621 PetscBool allcolumns, colflag; 3622 Mat M,Mreuse; 3623 MatScalar *vwork,*aa; 3624 MPI_Comm comm; 3625 Mat_SeqAIJ *aij; 3626 3627 PetscFunctionBegin; 3628 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3629 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3631 3632 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3633 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3634 if (colflag && ncol == mat->cmap->N) { 3635 allcolumns = PETSC_TRUE; 3636 } else { 3637 allcolumns = PETSC_FALSE; 3638 } 3639 if (call == MAT_REUSE_MATRIX) { 3640 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3641 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3642 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3643 } else { 3644 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3645 } 3646 3647 /* 3648 m - number of local rows 3649 n - number of columns (same on all processors) 3650 rstart - first row in new global matrix generated 3651 */ 3652 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3653 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3654 if (call == MAT_INITIAL_MATRIX) { 3655 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3656 ii = aij->i; 3657 jj = aij->j; 3658 3659 /* 3660 Determine the number of non-zeros in the diagonal and off-diagonal 3661 portions of the matrix in order to do correct preallocation 3662 */ 3663 3664 /* first get start and end of "diagonal" columns */ 3665 if (csize == PETSC_DECIDE) { 3666 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3667 if (mglobal == n) { /* square matrix */ 3668 nlocal = m; 3669 } else { 3670 nlocal = n/size + ((n % size) > rank); 3671 } 3672 } else { 3673 nlocal = csize; 3674 } 3675 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3676 rstart = rend - nlocal; 3677 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3678 3679 /* next, compute all the lengths */ 3680 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3681 olens = dlens + m; 3682 for (i=0; i<m; i++) { 3683 jend = ii[i+1] - ii[i]; 3684 olen = 0; 3685 dlen = 0; 3686 for (j=0; j<jend; j++) { 3687 if (*jj < rstart || *jj >= rend) olen++; 3688 else dlen++; 3689 jj++; 3690 } 3691 olens[i] = olen; 3692 dlens[i] = dlen; 3693 } 3694 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3695 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3696 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3697 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3698 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3699 ierr = PetscFree(dlens);CHKERRQ(ierr); 3700 } else { 3701 PetscInt ml,nl; 3702 3703 M = *newmat; 3704 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3705 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3706 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3707 /* 3708 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3709 rather than the slower MatSetValues(). 3710 */ 3711 M->was_assembled = PETSC_TRUE; 3712 M->assembled = PETSC_FALSE; 3713 } 3714 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3715 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3716 ii = aij->i; 3717 jj = aij->j; 3718 aa = aij->a; 3719 for (i=0; i<m; i++) { 3720 row = rstart + i; 3721 nz = ii[i+1] - ii[i]; 3722 cwork = jj; jj += nz; 3723 vwork = aa; aa += nz; 3724 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3725 } 3726 3727 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3728 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3729 *newmat = M; 3730 3731 /* save submatrix used in processor for next request */ 3732 if (call == MAT_INITIAL_MATRIX) { 3733 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3734 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3735 } 3736 PetscFunctionReturn(0); 3737 } 3738 3739 #undef __FUNCT__ 3740 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3741 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3742 { 3743 PetscInt m,cstart, cend,j,nnz,i,d; 3744 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3745 const PetscInt *JJ; 3746 PetscScalar *values; 3747 PetscErrorCode ierr; 3748 3749 PetscFunctionBegin; 3750 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3751 3752 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3753 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3754 m = B->rmap->n; 3755 cstart = B->cmap->rstart; 3756 cend = B->cmap->rend; 3757 rstart = B->rmap->rstart; 3758 3759 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3760 3761 #if defined(PETSC_USE_DEBUGGING) 3762 for (i=0; i<m; i++) { 3763 nnz = Ii[i+1]- Ii[i]; 3764 JJ = J + Ii[i]; 3765 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3766 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3767 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3768 } 3769 #endif 3770 3771 for (i=0; i<m; i++) { 3772 nnz = Ii[i+1]- Ii[i]; 3773 JJ = J + Ii[i]; 3774 nnz_max = PetscMax(nnz_max,nnz); 3775 d = 0; 3776 for (j=0; j<nnz; j++) { 3777 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3778 } 3779 d_nnz[i] = d; 3780 o_nnz[i] = nnz - d; 3781 } 3782 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3783 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3784 3785 if (v) values = (PetscScalar*)v; 3786 else { 3787 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3788 } 3789 3790 for (i=0; i<m; i++) { 3791 ii = i + rstart; 3792 nnz = Ii[i+1]- Ii[i]; 3793 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3794 } 3795 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3797 3798 if (!v) { 3799 ierr = PetscFree(values);CHKERRQ(ierr); 3800 } 3801 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 #undef __FUNCT__ 3806 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3807 /*@ 3808 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3809 (the default parallel PETSc format). 3810 3811 Collective on MPI_Comm 3812 3813 Input Parameters: 3814 + B - the matrix 3815 . i - the indices into j for the start of each local row (starts with zero) 3816 . j - the column indices for each local row (starts with zero) 3817 - v - optional values in the matrix 3818 3819 Level: developer 3820 3821 Notes: 3822 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3823 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3824 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3825 3826 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3827 3828 The format which is used for the sparse matrix input, is equivalent to a 3829 row-major ordering.. i.e for the following matrix, the input data expected is 3830 as shown: 3831 3832 1 0 0 3833 2 0 3 P0 3834 ------- 3835 4 5 6 P1 3836 3837 Process0 [P0]: rows_owned=[0,1] 3838 i = {0,1,3} [size = nrow+1 = 2+1] 3839 j = {0,0,2} [size = nz = 6] 3840 v = {1,2,3} [size = nz = 6] 3841 3842 Process1 [P1]: rows_owned=[2] 3843 i = {0,3} [size = nrow+1 = 1+1] 3844 j = {0,1,2} [size = nz = 6] 3845 v = {4,5,6} [size = nz = 6] 3846 3847 .keywords: matrix, aij, compressed row, sparse, parallel 3848 3849 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3850 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3851 @*/ 3852 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3853 { 3854 PetscErrorCode ierr; 3855 3856 PetscFunctionBegin; 3857 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 3861 #undef __FUNCT__ 3862 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3863 /*@C 3864 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3865 (the default parallel PETSc format). For good matrix assembly performance 3866 the user should preallocate the matrix storage by setting the parameters 3867 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3868 performance can be increased by more than a factor of 50. 3869 3870 Collective on MPI_Comm 3871 3872 Input Parameters: 3873 + A - the matrix 3874 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3875 (same value is used for all local rows) 3876 . d_nnz - array containing the number of nonzeros in the various rows of the 3877 DIAGONAL portion of the local submatrix (possibly different for each row) 3878 or NULL, if d_nz is used to specify the nonzero structure. 3879 The size of this array is equal to the number of local rows, i.e 'm'. 3880 For matrices that will be factored, you must leave room for (and set) 3881 the diagonal entry even if it is zero. 3882 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3883 submatrix (same value is used for all local rows). 3884 - o_nnz - array containing the number of nonzeros in the various rows of the 3885 OFF-DIAGONAL portion of the local submatrix (possibly different for 3886 each row) or NULL, if o_nz is used to specify the nonzero 3887 structure. The size of this array is equal to the number 3888 of local rows, i.e 'm'. 3889 3890 If the *_nnz parameter is given then the *_nz parameter is ignored 3891 3892 The AIJ format (also called the Yale sparse matrix format or 3893 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3894 storage. The stored row and column indices begin with zero. 3895 See Users-Manual: ch_mat for details. 3896 3897 The parallel matrix is partitioned such that the first m0 rows belong to 3898 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3899 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3900 3901 The DIAGONAL portion of the local submatrix of a processor can be defined 3902 as the submatrix which is obtained by extraction the part corresponding to 3903 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3904 first row that belongs to the processor, r2 is the last row belonging to 3905 the this processor, and c1-c2 is range of indices of the local part of a 3906 vector suitable for applying the matrix to. This is an mxn matrix. In the 3907 common case of a square matrix, the row and column ranges are the same and 3908 the DIAGONAL part is also square. The remaining portion of the local 3909 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3910 3911 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3912 3913 You can call MatGetInfo() to get information on how effective the preallocation was; 3914 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3915 You can also run with the option -info and look for messages with the string 3916 malloc in them to see if additional memory allocation was needed. 3917 3918 Example usage: 3919 3920 Consider the following 8x8 matrix with 34 non-zero values, that is 3921 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3922 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3923 as follows: 3924 3925 .vb 3926 1 2 0 | 0 3 0 | 0 4 3927 Proc0 0 5 6 | 7 0 0 | 8 0 3928 9 0 10 | 11 0 0 | 12 0 3929 ------------------------------------- 3930 13 0 14 | 15 16 17 | 0 0 3931 Proc1 0 18 0 | 19 20 21 | 0 0 3932 0 0 0 | 22 23 0 | 24 0 3933 ------------------------------------- 3934 Proc2 25 26 27 | 0 0 28 | 29 0 3935 30 0 0 | 31 32 33 | 0 34 3936 .ve 3937 3938 This can be represented as a collection of submatrices as: 3939 3940 .vb 3941 A B C 3942 D E F 3943 G H I 3944 .ve 3945 3946 Where the submatrices A,B,C are owned by proc0, D,E,F are 3947 owned by proc1, G,H,I are owned by proc2. 3948 3949 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3950 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3951 The 'M','N' parameters are 8,8, and have the same values on all procs. 3952 3953 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3954 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3955 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3956 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3957 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3958 matrix, ans [DF] as another SeqAIJ matrix. 3959 3960 When d_nz, o_nz parameters are specified, d_nz storage elements are 3961 allocated for every row of the local diagonal submatrix, and o_nz 3962 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3963 One way to choose d_nz and o_nz is to use the max nonzerors per local 3964 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3965 In this case, the values of d_nz,o_nz are: 3966 .vb 3967 proc0 : dnz = 2, o_nz = 2 3968 proc1 : dnz = 3, o_nz = 2 3969 proc2 : dnz = 1, o_nz = 4 3970 .ve 3971 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3972 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3973 for proc3. i.e we are using 12+15+10=37 storage locations to store 3974 34 values. 3975 3976 When d_nnz, o_nnz parameters are specified, the storage is specified 3977 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3978 In the above case the values for d_nnz,o_nnz are: 3979 .vb 3980 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3981 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3982 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3983 .ve 3984 Here the space allocated is sum of all the above values i.e 34, and 3985 hence pre-allocation is perfect. 3986 3987 Level: intermediate 3988 3989 .keywords: matrix, aij, compressed row, sparse, parallel 3990 3991 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3992 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3993 @*/ 3994 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3995 { 3996 PetscErrorCode ierr; 3997 3998 PetscFunctionBegin; 3999 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4000 PetscValidType(B,1); 4001 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4002 PetscFunctionReturn(0); 4003 } 4004 4005 #undef __FUNCT__ 4006 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4007 /*@ 4008 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4009 CSR format the local rows. 4010 4011 Collective on MPI_Comm 4012 4013 Input Parameters: 4014 + comm - MPI communicator 4015 . m - number of local rows (Cannot be PETSC_DECIDE) 4016 . n - This value should be the same as the local size used in creating the 4017 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4018 calculated if N is given) For square matrices n is almost always m. 4019 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4020 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4021 . i - row indices 4022 . j - column indices 4023 - a - matrix values 4024 4025 Output Parameter: 4026 . mat - the matrix 4027 4028 Level: intermediate 4029 4030 Notes: 4031 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4032 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4033 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4034 4035 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4036 4037 The format which is used for the sparse matrix input, is equivalent to a 4038 row-major ordering.. i.e for the following matrix, the input data expected is 4039 as shown: 4040 4041 1 0 0 4042 2 0 3 P0 4043 ------- 4044 4 5 6 P1 4045 4046 Process0 [P0]: rows_owned=[0,1] 4047 i = {0,1,3} [size = nrow+1 = 2+1] 4048 j = {0,0,2} [size = nz = 6] 4049 v = {1,2,3} [size = nz = 6] 4050 4051 Process1 [P1]: rows_owned=[2] 4052 i = {0,3} [size = nrow+1 = 1+1] 4053 j = {0,1,2} [size = nz = 6] 4054 v = {4,5,6} [size = nz = 6] 4055 4056 .keywords: matrix, aij, compressed row, sparse, parallel 4057 4058 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4059 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4060 @*/ 4061 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4062 { 4063 PetscErrorCode ierr; 4064 4065 PetscFunctionBegin; 4066 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4067 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4068 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4069 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4070 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4071 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4072 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4073 PetscFunctionReturn(0); 4074 } 4075 4076 #undef __FUNCT__ 4077 #define __FUNCT__ "MatCreateAIJ" 4078 /*@C 4079 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4080 (the default parallel PETSc format). For good matrix assembly performance 4081 the user should preallocate the matrix storage by setting the parameters 4082 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4083 performance can be increased by more than a factor of 50. 4084 4085 Collective on MPI_Comm 4086 4087 Input Parameters: 4088 + comm - MPI communicator 4089 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4090 This value should be the same as the local size used in creating the 4091 y vector for the matrix-vector product y = Ax. 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4098 (same value is used for all local rows) 4099 . d_nnz - array containing the number of nonzeros in the various rows of the 4100 DIAGONAL portion of the local submatrix (possibly different for each row) 4101 or NULL, if d_nz is used to specify the nonzero structure. 4102 The size of this array is equal to the number of local rows, i.e 'm'. 4103 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4104 submatrix (same value is used for all local rows). 4105 - o_nnz - array containing the number of nonzeros in the various rows of the 4106 OFF-DIAGONAL portion of the local submatrix (possibly different for 4107 each row) or NULL, if o_nz is used to specify the nonzero 4108 structure. The size of this array is equal to the number 4109 of local rows, i.e 'm'. 4110 4111 Output Parameter: 4112 . A - the matrix 4113 4114 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4115 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4116 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4117 4118 Notes: 4119 If the *_nnz parameter is given then the *_nz parameter is ignored 4120 4121 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4122 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4123 storage requirements for this matrix. 4124 4125 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4126 processor than it must be used on all processors that share the object for 4127 that argument. 4128 4129 The user MUST specify either the local or global matrix dimensions 4130 (possibly both). 4131 4132 The parallel matrix is partitioned across processors such that the 4133 first m0 rows belong to process 0, the next m1 rows belong to 4134 process 1, the next m2 rows belong to process 2 etc.. where 4135 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4136 values corresponding to [m x N] submatrix. 4137 4138 The columns are logically partitioned with the n0 columns belonging 4139 to 0th partition, the next n1 columns belonging to the next 4140 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4141 4142 The DIAGONAL portion of the local submatrix on any given processor 4143 is the submatrix corresponding to the rows and columns m,n 4144 corresponding to the given processor. i.e diagonal matrix on 4145 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4146 etc. The remaining portion of the local submatrix [m x (N-n)] 4147 constitute the OFF-DIAGONAL portion. The example below better 4148 illustrates this concept. 4149 4150 For a square global matrix we define each processor's diagonal portion 4151 to be its local rows and the corresponding columns (a square submatrix); 4152 each processor's off-diagonal portion encompasses the remainder of the 4153 local matrix (a rectangular submatrix). 4154 4155 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4156 4157 When calling this routine with a single process communicator, a matrix of 4158 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4159 type of communicator, use the construction mechanism: 4160 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4161 4162 By default, this format uses inodes (identical nodes) when possible. 4163 We search for consecutive rows with the same nonzero structure, thereby 4164 reusing matrix information to achieve increased efficiency. 4165 4166 Options Database Keys: 4167 + -mat_no_inode - Do not use inodes 4168 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4169 - -mat_aij_oneindex - Internally use indexing starting at 1 4170 rather than 0. Note that when calling MatSetValues(), 4171 the user still MUST index entries starting at 0! 4172 4173 4174 Example usage: 4175 4176 Consider the following 8x8 matrix with 34 non-zero values, that is 4177 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4178 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4179 as follows: 4180 4181 .vb 4182 1 2 0 | 0 3 0 | 0 4 4183 Proc0 0 5 6 | 7 0 0 | 8 0 4184 9 0 10 | 11 0 0 | 12 0 4185 ------------------------------------- 4186 13 0 14 | 15 16 17 | 0 0 4187 Proc1 0 18 0 | 19 20 21 | 0 0 4188 0 0 0 | 22 23 0 | 24 0 4189 ------------------------------------- 4190 Proc2 25 26 27 | 0 0 28 | 29 0 4191 30 0 0 | 31 32 33 | 0 34 4192 .ve 4193 4194 This can be represented as a collection of submatrices as: 4195 4196 .vb 4197 A B C 4198 D E F 4199 G H I 4200 .ve 4201 4202 Where the submatrices A,B,C are owned by proc0, D,E,F are 4203 owned by proc1, G,H,I are owned by proc2. 4204 4205 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4206 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4207 The 'M','N' parameters are 8,8, and have the same values on all procs. 4208 4209 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4210 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4211 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4212 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4213 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4214 matrix, ans [DF] as another SeqAIJ matrix. 4215 4216 When d_nz, o_nz parameters are specified, d_nz storage elements are 4217 allocated for every row of the local diagonal submatrix, and o_nz 4218 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4219 One way to choose d_nz and o_nz is to use the max nonzerors per local 4220 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4221 In this case, the values of d_nz,o_nz are: 4222 .vb 4223 proc0 : dnz = 2, o_nz = 2 4224 proc1 : dnz = 3, o_nz = 2 4225 proc2 : dnz = 1, o_nz = 4 4226 .ve 4227 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4228 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4229 for proc3. i.e we are using 12+15+10=37 storage locations to store 4230 34 values. 4231 4232 When d_nnz, o_nnz parameters are specified, the storage is specified 4233 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4234 In the above case the values for d_nnz,o_nnz are: 4235 .vb 4236 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4237 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4238 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4239 .ve 4240 Here the space allocated is sum of all the above values i.e 34, and 4241 hence pre-allocation is perfect. 4242 4243 Level: intermediate 4244 4245 .keywords: matrix, aij, compressed row, sparse, parallel 4246 4247 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4248 MPIAIJ, MatCreateMPIAIJWithArrays() 4249 @*/ 4250 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4251 { 4252 PetscErrorCode ierr; 4253 PetscMPIInt size; 4254 4255 PetscFunctionBegin; 4256 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4257 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4258 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4259 if (size > 1) { 4260 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4261 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4262 } else { 4263 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4264 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4265 } 4266 PetscFunctionReturn(0); 4267 } 4268 4269 #undef __FUNCT__ 4270 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4271 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4272 { 4273 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4274 4275 PetscFunctionBegin; 4276 *Ad = a->A; 4277 *Ao = a->B; 4278 *colmap = a->garray; 4279 PetscFunctionReturn(0); 4280 } 4281 4282 #undef __FUNCT__ 4283 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4284 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4285 { 4286 PetscErrorCode ierr; 4287 PetscInt i; 4288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4289 4290 PetscFunctionBegin; 4291 if (coloring->ctype == IS_COLORING_GLOBAL) { 4292 ISColoringValue *allcolors,*colors; 4293 ISColoring ocoloring; 4294 4295 /* set coloring for diagonal portion */ 4296 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4297 4298 /* set coloring for off-diagonal portion */ 4299 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4300 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4301 for (i=0; i<a->B->cmap->n; i++) { 4302 colors[i] = allcolors[a->garray[i]]; 4303 } 4304 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4305 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4306 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4307 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4308 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4309 ISColoringValue *colors; 4310 PetscInt *larray; 4311 ISColoring ocoloring; 4312 4313 /* set coloring for diagonal portion */ 4314 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4315 for (i=0; i<a->A->cmap->n; i++) { 4316 larray[i] = i + A->cmap->rstart; 4317 } 4318 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4319 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4320 for (i=0; i<a->A->cmap->n; i++) { 4321 colors[i] = coloring->colors[larray[i]]; 4322 } 4323 ierr = PetscFree(larray);CHKERRQ(ierr); 4324 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4325 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4326 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4327 4328 /* set coloring for off-diagonal portion */ 4329 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4330 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4331 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4332 for (i=0; i<a->B->cmap->n; i++) { 4333 colors[i] = coloring->colors[larray[i]]; 4334 } 4335 ierr = PetscFree(larray);CHKERRQ(ierr); 4336 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4337 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4338 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4339 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4340 PetscFunctionReturn(0); 4341 } 4342 4343 #undef __FUNCT__ 4344 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4345 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4346 { 4347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4348 PetscErrorCode ierr; 4349 4350 PetscFunctionBegin; 4351 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4352 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4353 PetscFunctionReturn(0); 4354 } 4355 4356 #undef __FUNCT__ 4357 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4358 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4359 { 4360 PetscErrorCode ierr; 4361 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4362 PetscInt *indx; 4363 4364 PetscFunctionBegin; 4365 /* This routine will ONLY return MPIAIJ type matrix */ 4366 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4367 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4368 if (n == PETSC_DECIDE) { 4369 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4370 } 4371 /* Check sum(n) = N */ 4372 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4373 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4374 4375 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4376 rstart -= m; 4377 4378 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4379 for (i=0; i<m; i++) { 4380 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4381 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4382 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4383 } 4384 4385 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4386 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4387 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4388 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4389 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4390 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4391 PetscFunctionReturn(0); 4392 } 4393 4394 #undef __FUNCT__ 4395 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4396 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4397 { 4398 PetscErrorCode ierr; 4399 PetscInt m,N,i,rstart,nnz,Ii; 4400 PetscInt *indx; 4401 PetscScalar *values; 4402 4403 PetscFunctionBegin; 4404 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4405 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4406 for (i=0; i<m; i++) { 4407 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4408 Ii = i + rstart; 4409 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4410 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4411 } 4412 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 #undef __FUNCT__ 4418 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4419 /*@ 4420 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4421 matrices from each processor 4422 4423 Collective on MPI_Comm 4424 4425 Input Parameters: 4426 + comm - the communicators the parallel matrix will live on 4427 . inmat - the input sequential matrices 4428 . n - number of local columns (or PETSC_DECIDE) 4429 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4430 4431 Output Parameter: 4432 . outmat - the parallel matrix generated 4433 4434 Level: advanced 4435 4436 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4437 4438 @*/ 4439 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4440 { 4441 PetscErrorCode ierr; 4442 PetscMPIInt size; 4443 4444 PetscFunctionBegin; 4445 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4446 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4447 if (size == 1) { 4448 if (scall == MAT_INITIAL_MATRIX) { 4449 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4450 } else { 4451 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4452 } 4453 } else { 4454 if (scall == MAT_INITIAL_MATRIX) { 4455 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4456 } 4457 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4458 } 4459 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4460 PetscFunctionReturn(0); 4461 } 4462 4463 #undef __FUNCT__ 4464 #define __FUNCT__ "MatFileSplit" 4465 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4466 { 4467 PetscErrorCode ierr; 4468 PetscMPIInt rank; 4469 PetscInt m,N,i,rstart,nnz; 4470 size_t len; 4471 const PetscInt *indx; 4472 PetscViewer out; 4473 char *name; 4474 Mat B; 4475 const PetscScalar *values; 4476 4477 PetscFunctionBegin; 4478 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4479 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4480 /* Should this be the type of the diagonal block of A? */ 4481 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4482 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4483 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4484 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4485 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4486 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4487 for (i=0; i<m; i++) { 4488 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4489 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4490 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4491 } 4492 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4493 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4494 4495 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4496 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4497 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4498 sprintf(name,"%s.%d",outfile,rank); 4499 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4500 ierr = PetscFree(name);CHKERRQ(ierr); 4501 ierr = MatView(B,out);CHKERRQ(ierr); 4502 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4503 ierr = MatDestroy(&B);CHKERRQ(ierr); 4504 PetscFunctionReturn(0); 4505 } 4506 4507 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4508 #undef __FUNCT__ 4509 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4510 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4511 { 4512 PetscErrorCode ierr; 4513 Mat_Merge_SeqsToMPI *merge; 4514 PetscContainer container; 4515 4516 PetscFunctionBegin; 4517 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4518 if (container) { 4519 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4520 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4521 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4522 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4532 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4533 ierr = PetscFree(merge);CHKERRQ(ierr); 4534 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4535 } 4536 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4537 PetscFunctionReturn(0); 4538 } 4539 4540 #include <../src/mat/utils/freespace.h> 4541 #include <petscbt.h> 4542 4543 #undef __FUNCT__ 4544 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4545 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4546 { 4547 PetscErrorCode ierr; 4548 MPI_Comm comm; 4549 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4550 PetscMPIInt size,rank,taga,*len_s; 4551 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4552 PetscInt proc,m; 4553 PetscInt **buf_ri,**buf_rj; 4554 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4555 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4556 MPI_Request *s_waits,*r_waits; 4557 MPI_Status *status; 4558 MatScalar *aa=a->a; 4559 MatScalar **abuf_r,*ba_i; 4560 Mat_Merge_SeqsToMPI *merge; 4561 PetscContainer container; 4562 4563 PetscFunctionBegin; 4564 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4565 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4566 4567 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4568 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4569 4570 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4571 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4572 4573 bi = merge->bi; 4574 bj = merge->bj; 4575 buf_ri = merge->buf_ri; 4576 buf_rj = merge->buf_rj; 4577 4578 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4579 owners = merge->rowmap->range; 4580 len_s = merge->len_s; 4581 4582 /* send and recv matrix values */ 4583 /*-----------------------------*/ 4584 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4585 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4586 4587 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4588 for (proc=0,k=0; proc<size; proc++) { 4589 if (!len_s[proc]) continue; 4590 i = owners[proc]; 4591 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4592 k++; 4593 } 4594 4595 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4596 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4597 ierr = PetscFree(status);CHKERRQ(ierr); 4598 4599 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4600 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4601 4602 /* insert mat values of mpimat */ 4603 /*----------------------------*/ 4604 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4605 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4606 4607 for (k=0; k<merge->nrecv; k++) { 4608 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4609 nrows = *(buf_ri_k[k]); 4610 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4611 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4612 } 4613 4614 /* set values of ba */ 4615 m = merge->rowmap->n; 4616 for (i=0; i<m; i++) { 4617 arow = owners[rank] + i; 4618 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4619 bnzi = bi[i+1] - bi[i]; 4620 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4621 4622 /* add local non-zero vals of this proc's seqmat into ba */ 4623 anzi = ai[arow+1] - ai[arow]; 4624 aj = a->j + ai[arow]; 4625 aa = a->a + ai[arow]; 4626 nextaj = 0; 4627 for (j=0; nextaj<anzi; j++) { 4628 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4629 ba_i[j] += aa[nextaj++]; 4630 } 4631 } 4632 4633 /* add received vals into ba */ 4634 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4635 /* i-th row */ 4636 if (i == *nextrow[k]) { 4637 anzi = *(nextai[k]+1) - *nextai[k]; 4638 aj = buf_rj[k] + *(nextai[k]); 4639 aa = abuf_r[k] + *(nextai[k]); 4640 nextaj = 0; 4641 for (j=0; nextaj<anzi; j++) { 4642 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4643 ba_i[j] += aa[nextaj++]; 4644 } 4645 } 4646 nextrow[k]++; nextai[k]++; 4647 } 4648 } 4649 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4650 } 4651 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4652 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 4654 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4655 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4656 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4657 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4658 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4659 PetscFunctionReturn(0); 4660 } 4661 4662 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4663 4664 #undef __FUNCT__ 4665 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4666 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4667 { 4668 PetscErrorCode ierr; 4669 Mat B_mpi; 4670 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4671 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4672 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4673 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4674 PetscInt len,proc,*dnz,*onz,bs,cbs; 4675 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4676 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4677 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4678 MPI_Status *status; 4679 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4680 PetscBT lnkbt; 4681 Mat_Merge_SeqsToMPI *merge; 4682 PetscContainer container; 4683 4684 PetscFunctionBegin; 4685 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4686 4687 /* make sure it is a PETSc comm */ 4688 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4689 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4690 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4691 4692 ierr = PetscNew(&merge);CHKERRQ(ierr); 4693 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4694 4695 /* determine row ownership */ 4696 /*---------------------------------------------------------*/ 4697 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4698 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4699 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4700 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4701 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4702 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4703 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4704 4705 m = merge->rowmap->n; 4706 owners = merge->rowmap->range; 4707 4708 /* determine the number of messages to send, their lengths */ 4709 /*---------------------------------------------------------*/ 4710 len_s = merge->len_s; 4711 4712 len = 0; /* length of buf_si[] */ 4713 merge->nsend = 0; 4714 for (proc=0; proc<size; proc++) { 4715 len_si[proc] = 0; 4716 if (proc == rank) { 4717 len_s[proc] = 0; 4718 } else { 4719 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4720 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4721 } 4722 if (len_s[proc]) { 4723 merge->nsend++; 4724 nrows = 0; 4725 for (i=owners[proc]; i<owners[proc+1]; i++) { 4726 if (ai[i+1] > ai[i]) nrows++; 4727 } 4728 len_si[proc] = 2*(nrows+1); 4729 len += len_si[proc]; 4730 } 4731 } 4732 4733 /* determine the number and length of messages to receive for ij-structure */ 4734 /*-------------------------------------------------------------------------*/ 4735 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4736 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4737 4738 /* post the Irecv of j-structure */ 4739 /*-------------------------------*/ 4740 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4741 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4742 4743 /* post the Isend of j-structure */ 4744 /*--------------------------------*/ 4745 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4746 4747 for (proc=0, k=0; proc<size; proc++) { 4748 if (!len_s[proc]) continue; 4749 i = owners[proc]; 4750 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4751 k++; 4752 } 4753 4754 /* receives and sends of j-structure are complete */ 4755 /*------------------------------------------------*/ 4756 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4757 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4758 4759 /* send and recv i-structure */ 4760 /*---------------------------*/ 4761 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4762 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4763 4764 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4765 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4766 for (proc=0,k=0; proc<size; proc++) { 4767 if (!len_s[proc]) continue; 4768 /* form outgoing message for i-structure: 4769 buf_si[0]: nrows to be sent 4770 [1:nrows]: row index (global) 4771 [nrows+1:2*nrows+1]: i-structure index 4772 */ 4773 /*-------------------------------------------*/ 4774 nrows = len_si[proc]/2 - 1; 4775 buf_si_i = buf_si + nrows+1; 4776 buf_si[0] = nrows; 4777 buf_si_i[0] = 0; 4778 nrows = 0; 4779 for (i=owners[proc]; i<owners[proc+1]; i++) { 4780 anzi = ai[i+1] - ai[i]; 4781 if (anzi) { 4782 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4783 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4784 nrows++; 4785 } 4786 } 4787 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4788 k++; 4789 buf_si += len_si[proc]; 4790 } 4791 4792 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4793 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4794 4795 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4796 for (i=0; i<merge->nrecv; i++) { 4797 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4798 } 4799 4800 ierr = PetscFree(len_si);CHKERRQ(ierr); 4801 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4802 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4803 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4804 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4805 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4806 ierr = PetscFree(status);CHKERRQ(ierr); 4807 4808 /* compute a local seq matrix in each processor */ 4809 /*----------------------------------------------*/ 4810 /* allocate bi array and free space for accumulating nonzero column info */ 4811 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4812 bi[0] = 0; 4813 4814 /* create and initialize a linked list */ 4815 nlnk = N+1; 4816 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4817 4818 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4819 len = ai[owners[rank+1]] - ai[owners[rank]]; 4820 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4821 4822 current_space = free_space; 4823 4824 /* determine symbolic info for each local row */ 4825 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4826 4827 for (k=0; k<merge->nrecv; k++) { 4828 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4829 nrows = *buf_ri_k[k]; 4830 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4831 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4832 } 4833 4834 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4835 len = 0; 4836 for (i=0; i<m; i++) { 4837 bnzi = 0; 4838 /* add local non-zero cols of this proc's seqmat into lnk */ 4839 arow = owners[rank] + i; 4840 anzi = ai[arow+1] - ai[arow]; 4841 aj = a->j + ai[arow]; 4842 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4843 bnzi += nlnk; 4844 /* add received col data into lnk */ 4845 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4846 if (i == *nextrow[k]) { /* i-th row */ 4847 anzi = *(nextai[k]+1) - *nextai[k]; 4848 aj = buf_rj[k] + *nextai[k]; 4849 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4850 bnzi += nlnk; 4851 nextrow[k]++; nextai[k]++; 4852 } 4853 } 4854 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4855 4856 /* if free space is not available, make more free space */ 4857 if (current_space->local_remaining<bnzi) { 4858 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4859 nspacedouble++; 4860 } 4861 /* copy data into free space, then initialize lnk */ 4862 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4863 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4864 4865 current_space->array += bnzi; 4866 current_space->local_used += bnzi; 4867 current_space->local_remaining -= bnzi; 4868 4869 bi[i+1] = bi[i] + bnzi; 4870 } 4871 4872 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4873 4874 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4875 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4876 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4877 4878 /* create symbolic parallel matrix B_mpi */ 4879 /*---------------------------------------*/ 4880 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4881 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4882 if (n==PETSC_DECIDE) { 4883 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4884 } else { 4885 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4886 } 4887 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4888 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4889 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4890 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4891 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4892 4893 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4894 B_mpi->assembled = PETSC_FALSE; 4895 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4896 merge->bi = bi; 4897 merge->bj = bj; 4898 merge->buf_ri = buf_ri; 4899 merge->buf_rj = buf_rj; 4900 merge->coi = NULL; 4901 merge->coj = NULL; 4902 merge->owners_co = NULL; 4903 4904 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4905 4906 /* attach the supporting struct to B_mpi for reuse */ 4907 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4908 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4909 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4910 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4911 *mpimat = B_mpi; 4912 4913 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4914 PetscFunctionReturn(0); 4915 } 4916 4917 #undef __FUNCT__ 4918 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4919 /*@C 4920 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4921 matrices from each processor 4922 4923 Collective on MPI_Comm 4924 4925 Input Parameters: 4926 + comm - the communicators the parallel matrix will live on 4927 . seqmat - the input sequential matrices 4928 . m - number of local rows (or PETSC_DECIDE) 4929 . n - number of local columns (or PETSC_DECIDE) 4930 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4931 4932 Output Parameter: 4933 . mpimat - the parallel matrix generated 4934 4935 Level: advanced 4936 4937 Notes: 4938 The dimensions of the sequential matrix in each processor MUST be the same. 4939 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4940 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4941 @*/ 4942 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4943 { 4944 PetscErrorCode ierr; 4945 PetscMPIInt size; 4946 4947 PetscFunctionBegin; 4948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4949 if (size == 1) { 4950 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4951 if (scall == MAT_INITIAL_MATRIX) { 4952 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4953 } else { 4954 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4955 } 4956 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 if (scall == MAT_INITIAL_MATRIX) { 4961 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4962 } 4963 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4964 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 4968 #undef __FUNCT__ 4969 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4970 /*@ 4971 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4972 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4973 with MatGetSize() 4974 4975 Not Collective 4976 4977 Input Parameters: 4978 + A - the matrix 4979 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4980 4981 Output Parameter: 4982 . A_loc - the local sequential matrix generated 4983 4984 Level: developer 4985 4986 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4987 4988 @*/ 4989 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4990 { 4991 PetscErrorCode ierr; 4992 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4993 Mat_SeqAIJ *mat,*a,*b; 4994 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4995 MatScalar *aa,*ba,*cam; 4996 PetscScalar *ca; 4997 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4998 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4999 PetscBool match; 5000 5001 PetscFunctionBegin; 5002 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5003 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5004 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5005 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5006 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5007 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5008 aa = a->a; ba = b->a; 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 5011 ci[0] = 0; 5012 for (i=0; i<am; i++) { 5013 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5014 } 5015 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 5016 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 5017 k = 0; 5018 for (i=0; i<am; i++) { 5019 ncols_o = bi[i+1] - bi[i]; 5020 ncols_d = ai[i+1] - ai[i]; 5021 /* off-diagonal portion of A */ 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 cj[k] = col; bj++; 5026 ca[k++] = *ba++; 5027 } 5028 /* diagonal portion of A */ 5029 for (j=0; j<ncols_d; j++) { 5030 cj[k] = cstart + *aj++; 5031 ca[k++] = *aa++; 5032 } 5033 /* off-diagonal portion of A */ 5034 for (j=jo; j<ncols_o; j++) { 5035 cj[k] = cmap[*bj++]; 5036 ca[k++] = *ba++; 5037 } 5038 } 5039 /* put together the new matrix */ 5040 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5041 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5042 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5043 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5044 mat->free_a = PETSC_TRUE; 5045 mat->free_ij = PETSC_TRUE; 5046 mat->nonew = 0; 5047 } else if (scall == MAT_REUSE_MATRIX) { 5048 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5049 ci = mat->i; cj = mat->j; cam = mat->a; 5050 for (i=0; i<am; i++) { 5051 /* off-diagonal portion of A */ 5052 ncols_o = bi[i+1] - bi[i]; 5053 for (jo=0; jo<ncols_o; jo++) { 5054 col = cmap[*bj]; 5055 if (col >= cstart) break; 5056 *cam++ = *ba++; bj++; 5057 } 5058 /* diagonal portion of A */ 5059 ncols_d = ai[i+1] - ai[i]; 5060 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5061 /* off-diagonal portion of A */ 5062 for (j=jo; j<ncols_o; j++) { 5063 *cam++ = *ba++; bj++; 5064 } 5065 } 5066 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5067 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5068 PetscFunctionReturn(0); 5069 } 5070 5071 #undef __FUNCT__ 5072 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5073 /*@C 5074 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5075 5076 Not Collective 5077 5078 Input Parameters: 5079 + A - the matrix 5080 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5081 - row, col - index sets of rows and columns to extract (or NULL) 5082 5083 Output Parameter: 5084 . A_loc - the local sequential matrix generated 5085 5086 Level: developer 5087 5088 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5089 5090 @*/ 5091 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5092 { 5093 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5094 PetscErrorCode ierr; 5095 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5096 IS isrowa,iscola; 5097 Mat *aloc; 5098 PetscBool match; 5099 5100 PetscFunctionBegin; 5101 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5102 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5103 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5104 if (!row) { 5105 start = A->rmap->rstart; end = A->rmap->rend; 5106 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5107 } else { 5108 isrowa = *row; 5109 } 5110 if (!col) { 5111 start = A->cmap->rstart; 5112 cmap = a->garray; 5113 nzA = a->A->cmap->n; 5114 nzB = a->B->cmap->n; 5115 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5116 ncols = 0; 5117 for (i=0; i<nzB; i++) { 5118 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5119 else break; 5120 } 5121 imark = i; 5122 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5123 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5124 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5125 } else { 5126 iscola = *col; 5127 } 5128 if (scall != MAT_INITIAL_MATRIX) { 5129 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5130 aloc[0] = *A_loc; 5131 } 5132 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5133 *A_loc = aloc[0]; 5134 ierr = PetscFree(aloc);CHKERRQ(ierr); 5135 if (!row) { 5136 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5137 } 5138 if (!col) { 5139 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5140 } 5141 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "MatGetBrowsOfAcols" 5147 /*@C 5148 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5149 5150 Collective on Mat 5151 5152 Input Parameters: 5153 + A,B - the matrices in mpiaij format 5154 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5155 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5156 5157 Output Parameter: 5158 + rowb, colb - index sets of rows and columns of B to extract 5159 - B_seq - the sequential matrix generated 5160 5161 Level: developer 5162 5163 @*/ 5164 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5165 { 5166 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5167 PetscErrorCode ierr; 5168 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5169 IS isrowb,iscolb; 5170 Mat *bseq=NULL; 5171 5172 PetscFunctionBegin; 5173 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5174 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5175 } 5176 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5177 5178 if (scall == MAT_INITIAL_MATRIX) { 5179 start = A->cmap->rstart; 5180 cmap = a->garray; 5181 nzA = a->A->cmap->n; 5182 nzB = a->B->cmap->n; 5183 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5184 ncols = 0; 5185 for (i=0; i<nzB; i++) { /* row < local row index */ 5186 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5187 else break; 5188 } 5189 imark = i; 5190 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5191 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5192 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5193 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5194 } else { 5195 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5196 isrowb = *rowb; iscolb = *colb; 5197 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5198 bseq[0] = *B_seq; 5199 } 5200 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5201 *B_seq = bseq[0]; 5202 ierr = PetscFree(bseq);CHKERRQ(ierr); 5203 if (!rowb) { 5204 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5205 } else { 5206 *rowb = isrowb; 5207 } 5208 if (!colb) { 5209 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5210 } else { 5211 *colb = iscolb; 5212 } 5213 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5214 PetscFunctionReturn(0); 5215 } 5216 5217 #undef __FUNCT__ 5218 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5219 /* 5220 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5221 of the OFF-DIAGONAL portion of local A 5222 5223 Collective on Mat 5224 5225 Input Parameters: 5226 + A,B - the matrices in mpiaij format 5227 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5228 5229 Output Parameter: 5230 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5231 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5232 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5233 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5234 5235 Level: developer 5236 5237 */ 5238 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5239 { 5240 VecScatter_MPI_General *gen_to,*gen_from; 5241 PetscErrorCode ierr; 5242 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5243 Mat_SeqAIJ *b_oth; 5244 VecScatter ctx =a->Mvctx; 5245 MPI_Comm comm; 5246 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5247 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5248 PetscScalar *rvalues,*svalues; 5249 MatScalar *b_otha,*bufa,*bufA; 5250 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5251 MPI_Request *rwaits = NULL,*swaits = NULL; 5252 MPI_Status *sstatus,rstatus; 5253 PetscMPIInt jj; 5254 PetscInt *cols,sbs,rbs; 5255 PetscScalar *vals; 5256 5257 PetscFunctionBegin; 5258 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5259 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5260 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5261 } 5262 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5264 5265 gen_to = (VecScatter_MPI_General*)ctx->todata; 5266 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5267 rvalues = gen_from->values; /* holds the length of receiving row */ 5268 svalues = gen_to->values; /* holds the length of sending row */ 5269 nrecvs = gen_from->n; 5270 nsends = gen_to->n; 5271 5272 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5273 srow = gen_to->indices; /* local row index to be sent */ 5274 sstarts = gen_to->starts; 5275 sprocs = gen_to->procs; 5276 sstatus = gen_to->sstatus; 5277 sbs = gen_to->bs; 5278 rstarts = gen_from->starts; 5279 rprocs = gen_from->procs; 5280 rbs = gen_from->bs; 5281 5282 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5283 if (scall == MAT_INITIAL_MATRIX) { 5284 /* i-array */ 5285 /*---------*/ 5286 /* post receives */ 5287 for (i=0; i<nrecvs; i++) { 5288 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5289 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5290 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5291 } 5292 5293 /* pack the outgoing message */ 5294 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5295 5296 sstartsj[0] = 0; 5297 rstartsj[0] = 0; 5298 len = 0; /* total length of j or a array to be sent */ 5299 k = 0; 5300 for (i=0; i<nsends; i++) { 5301 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5302 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5303 for (j=0; j<nrows; j++) { 5304 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5305 for (l=0; l<sbs; l++) { 5306 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5307 5308 rowlen[j*sbs+l] = ncols; 5309 5310 len += ncols; 5311 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5312 } 5313 k++; 5314 } 5315 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5316 5317 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5318 } 5319 /* recvs and sends of i-array are completed */ 5320 i = nrecvs; 5321 while (i--) { 5322 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5323 } 5324 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5325 5326 /* allocate buffers for sending j and a arrays */ 5327 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5328 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5329 5330 /* create i-array of B_oth */ 5331 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5332 5333 b_othi[0] = 0; 5334 len = 0; /* total length of j or a array to be received */ 5335 k = 0; 5336 for (i=0; i<nrecvs; i++) { 5337 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5338 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5339 for (j=0; j<nrows; j++) { 5340 b_othi[k+1] = b_othi[k] + rowlen[j]; 5341 len += rowlen[j]; k++; 5342 } 5343 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5344 } 5345 5346 /* allocate space for j and a arrrays of B_oth */ 5347 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5348 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5349 5350 /* j-array */ 5351 /*---------*/ 5352 /* post receives of j-array */ 5353 for (i=0; i<nrecvs; i++) { 5354 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5355 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message j-array */ 5359 k = 0; 5360 for (i=0; i<nsends; i++) { 5361 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5362 bufJ = bufj+sstartsj[i]; 5363 for (j=0; j<nrows; j++) { 5364 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5365 for (ll=0; ll<sbs; ll++) { 5366 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5367 for (l=0; l<ncols; l++) { 5368 *bufJ++ = cols[l]; 5369 } 5370 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5374 } 5375 5376 /* recvs and sends of j-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5382 } else if (scall == MAT_REUSE_MATRIX) { 5383 sstartsj = *startsj_s; 5384 rstartsj = *startsj_r; 5385 bufa = *bufa_ptr; 5386 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5387 b_otha = b_oth->a; 5388 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5389 5390 /* a-array */ 5391 /*---------*/ 5392 /* post receives of a-array */ 5393 for (i=0; i<nrecvs; i++) { 5394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5395 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* pack the outgoing message a-array */ 5399 k = 0; 5400 for (i=0; i<nsends; i++) { 5401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5402 bufA = bufa+sstartsj[i]; 5403 for (j=0; j<nrows; j++) { 5404 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5405 for (ll=0; ll<sbs; ll++) { 5406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5407 for (l=0; l<ncols; l++) { 5408 *bufA++ = vals[l]; 5409 } 5410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5414 } 5415 /* recvs and sends of a-array are completed */ 5416 i = nrecvs; 5417 while (i--) { 5418 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5419 } 5420 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5421 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5422 5423 if (scall == MAT_INITIAL_MATRIX) { 5424 /* put together the new matrix */ 5425 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5426 5427 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5428 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5429 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5430 b_oth->free_a = PETSC_TRUE; 5431 b_oth->free_ij = PETSC_TRUE; 5432 b_oth->nonew = 0; 5433 5434 ierr = PetscFree(bufj);CHKERRQ(ierr); 5435 if (!startsj_s || !bufa_ptr) { 5436 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5437 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5438 } else { 5439 *startsj_s = sstartsj; 5440 *startsj_r = rstartsj; 5441 *bufa_ptr = bufa; 5442 } 5443 } 5444 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5445 PetscFunctionReturn(0); 5446 } 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "MatGetCommunicationStructs" 5450 /*@C 5451 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5452 5453 Not Collective 5454 5455 Input Parameters: 5456 . A - The matrix in mpiaij format 5457 5458 Output Parameter: 5459 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5460 . colmap - A map from global column index to local index into lvec 5461 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5462 5463 Level: developer 5464 5465 @*/ 5466 #if defined(PETSC_USE_CTABLE) 5467 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5468 #else 5469 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5470 #endif 5471 { 5472 Mat_MPIAIJ *a; 5473 5474 PetscFunctionBegin; 5475 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5476 PetscValidPointer(lvec, 2); 5477 PetscValidPointer(colmap, 3); 5478 PetscValidPointer(multScatter, 4); 5479 a = (Mat_MPIAIJ*) A->data; 5480 if (lvec) *lvec = a->lvec; 5481 if (colmap) *colmap = a->colmap; 5482 if (multScatter) *multScatter = a->Mvctx; 5483 PetscFunctionReturn(0); 5484 } 5485 5486 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5487 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5488 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5492 /* 5493 Computes (B'*A')' since computing B*A directly is untenable 5494 5495 n p p 5496 ( ) ( ) ( ) 5497 m ( A ) * n ( B ) = m ( C ) 5498 ( ) ( ) ( ) 5499 5500 */ 5501 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5502 { 5503 PetscErrorCode ierr; 5504 Mat At,Bt,Ct; 5505 5506 PetscFunctionBegin; 5507 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5508 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5509 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5510 ierr = MatDestroy(&At);CHKERRQ(ierr); 5511 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5512 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5513 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 #undef __FUNCT__ 5518 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5519 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5520 { 5521 PetscErrorCode ierr; 5522 PetscInt m=A->rmap->n,n=B->cmap->n; 5523 Mat Cmat; 5524 5525 PetscFunctionBegin; 5526 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5527 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5528 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5529 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5530 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5531 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5532 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5533 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5534 5535 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5536 5537 *C = Cmat; 5538 PetscFunctionReturn(0); 5539 } 5540 5541 /* ----------------------------------------------------------------*/ 5542 #undef __FUNCT__ 5543 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5544 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5545 { 5546 PetscErrorCode ierr; 5547 5548 PetscFunctionBegin; 5549 if (scall == MAT_INITIAL_MATRIX) { 5550 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5551 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5552 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5553 } 5554 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5555 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5556 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5557 PetscFunctionReturn(0); 5558 } 5559 5560 #if defined(PETSC_HAVE_MUMPS) 5561 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5562 #endif 5563 #if defined(PETSC_HAVE_PASTIX) 5564 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5565 #endif 5566 #if defined(PETSC_HAVE_SUPERLU_DIST) 5567 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5568 #endif 5569 #if defined(PETSC_HAVE_CLIQUE) 5570 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5571 #endif 5572 5573 /*MC 5574 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5575 5576 Options Database Keys: 5577 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5578 5579 Level: beginner 5580 5581 .seealso: MatCreateAIJ() 5582 M*/ 5583 5584 #undef __FUNCT__ 5585 #define __FUNCT__ "MatCreate_MPIAIJ" 5586 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5587 { 5588 Mat_MPIAIJ *b; 5589 PetscErrorCode ierr; 5590 PetscMPIInt size; 5591 5592 PetscFunctionBegin; 5593 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5594 5595 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5596 B->data = (void*)b; 5597 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5598 B->assembled = PETSC_FALSE; 5599 B->insertmode = NOT_SET_VALUES; 5600 b->size = size; 5601 5602 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5603 5604 /* build cache for off array entries formed */ 5605 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5606 5607 b->donotstash = PETSC_FALSE; 5608 b->colmap = 0; 5609 b->garray = 0; 5610 b->roworiented = PETSC_TRUE; 5611 5612 /* stuff used for matrix vector multiply */ 5613 b->lvec = NULL; 5614 b->Mvctx = NULL; 5615 5616 /* stuff for MatGetRow() */ 5617 b->rowindices = 0; 5618 b->rowvalues = 0; 5619 b->getrowactive = PETSC_FALSE; 5620 5621 /* flexible pointer used in CUSP/CUSPARSE classes */ 5622 b->spptr = NULL; 5623 5624 #if defined(PETSC_HAVE_MUMPS) 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5626 #endif 5627 #if defined(PETSC_HAVE_PASTIX) 5628 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5629 #endif 5630 #if defined(PETSC_HAVE_SUPERLU_DIST) 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5632 #endif 5633 #if defined(PETSC_HAVE_CLIQUE) 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5635 #endif 5636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5650 PetscFunctionReturn(0); 5651 } 5652 5653 #undef __FUNCT__ 5654 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5655 /*@ 5656 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5657 and "off-diagonal" part of the matrix in CSR format. 5658 5659 Collective on MPI_Comm 5660 5661 Input Parameters: 5662 + comm - MPI communicator 5663 . m - number of local rows (Cannot be PETSC_DECIDE) 5664 . n - This value should be the same as the local size used in creating the 5665 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5666 calculated if N is given) For square matrices n is almost always m. 5667 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5668 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5669 . i - row indices for "diagonal" portion of matrix 5670 . j - column indices 5671 . a - matrix values 5672 . oi - row indices for "off-diagonal" portion of matrix 5673 . oj - column indices 5674 - oa - matrix values 5675 5676 Output Parameter: 5677 . mat - the matrix 5678 5679 Level: advanced 5680 5681 Notes: 5682 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5683 must free the arrays once the matrix has been destroyed and not before. 5684 5685 The i and j indices are 0 based 5686 5687 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5688 5689 This sets local rows and cannot be used to set off-processor values. 5690 5691 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5692 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5693 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5694 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5695 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5696 communication if it is known that only local entries will be set. 5697 5698 .keywords: matrix, aij, compressed row, sparse, parallel 5699 5700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5701 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5702 @*/ 5703 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5704 { 5705 PetscErrorCode ierr; 5706 Mat_MPIAIJ *maij; 5707 5708 PetscFunctionBegin; 5709 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5710 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5711 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5712 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5713 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5714 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5715 maij = (Mat_MPIAIJ*) (*mat)->data; 5716 5717 (*mat)->preallocated = PETSC_TRUE; 5718 5719 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5720 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5721 5722 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5723 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5724 5725 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5727 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5728 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 5730 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5731 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5732 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5733 PetscFunctionReturn(0); 5734 } 5735 5736 /* 5737 Special version for direct calls from Fortran 5738 */ 5739 #include <petsc-private/fortranimpl.h> 5740 5741 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5742 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5743 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5744 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5745 #endif 5746 5747 /* Change these macros so can be used in void function */ 5748 #undef CHKERRQ 5749 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5750 #undef SETERRQ2 5751 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5752 #undef SETERRQ3 5753 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5754 #undef SETERRQ 5755 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5756 5757 #undef __FUNCT__ 5758 #define __FUNCT__ "matsetvaluesmpiaij_" 5759 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5760 { 5761 Mat mat = *mmat; 5762 PetscInt m = *mm, n = *mn; 5763 InsertMode addv = *maddv; 5764 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5765 PetscScalar value; 5766 PetscErrorCode ierr; 5767 5768 MatCheckPreallocated(mat,1); 5769 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5770 5771 #if defined(PETSC_USE_DEBUG) 5772 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5773 #endif 5774 { 5775 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5776 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5777 PetscBool roworiented = aij->roworiented; 5778 5779 /* Some Variables required in the macro */ 5780 Mat A = aij->A; 5781 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5782 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5783 MatScalar *aa = a->a; 5784 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5785 Mat B = aij->B; 5786 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5787 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5788 MatScalar *ba = b->a; 5789 5790 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5791 PetscInt nonew = a->nonew; 5792 MatScalar *ap1,*ap2; 5793 5794 PetscFunctionBegin; 5795 for (i=0; i<m; i++) { 5796 if (im[i] < 0) continue; 5797 #if defined(PETSC_USE_DEBUG) 5798 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5799 #endif 5800 if (im[i] >= rstart && im[i] < rend) { 5801 row = im[i] - rstart; 5802 lastcol1 = -1; 5803 rp1 = aj + ai[row]; 5804 ap1 = aa + ai[row]; 5805 rmax1 = aimax[row]; 5806 nrow1 = ailen[row]; 5807 low1 = 0; 5808 high1 = nrow1; 5809 lastcol2 = -1; 5810 rp2 = bj + bi[row]; 5811 ap2 = ba + bi[row]; 5812 rmax2 = bimax[row]; 5813 nrow2 = bilen[row]; 5814 low2 = 0; 5815 high2 = nrow2; 5816 5817 for (j=0; j<n; j++) { 5818 if (roworiented) value = v[i*n+j]; 5819 else value = v[i+j*m]; 5820 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5821 if (in[j] >= cstart && in[j] < cend) { 5822 col = in[j] - cstart; 5823 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5824 } else if (in[j] < 0) continue; 5825 #if defined(PETSC_USE_DEBUG) 5826 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5827 #endif 5828 else { 5829 if (mat->was_assembled) { 5830 if (!aij->colmap) { 5831 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5832 } 5833 #if defined(PETSC_USE_CTABLE) 5834 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5835 col--; 5836 #else 5837 col = aij->colmap[in[j]] - 1; 5838 #endif 5839 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5840 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5841 col = in[j]; 5842 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5843 B = aij->B; 5844 b = (Mat_SeqAIJ*)B->data; 5845 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5846 rp2 = bj + bi[row]; 5847 ap2 = ba + bi[row]; 5848 rmax2 = bimax[row]; 5849 nrow2 = bilen[row]; 5850 low2 = 0; 5851 high2 = nrow2; 5852 bm = aij->B->rmap->n; 5853 ba = b->a; 5854 } 5855 } else col = in[j]; 5856 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5857 } 5858 } 5859 } else if (!aij->donotstash) { 5860 if (roworiented) { 5861 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5862 } else { 5863 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5864 } 5865 } 5866 } 5867 } 5868 PetscFunctionReturnVoid(); 5869 } 5870 5871