1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) ap1[_i] += value; \ 444 else ap1[_i] = value; \ 445 goto a_noinsert; \ 446 } \ 447 } \ 448 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 449 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 450 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 451 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 452 N = nrow1++ - 1; a->nz++; high1++; \ 453 /* shift up all the later entries in this row */ \ 454 for (ii=N; ii>=_i; ii--) { \ 455 rp1[ii+1] = rp1[ii]; \ 456 ap1[ii+1] = ap1[ii]; \ 457 } \ 458 rp1[_i] = col; \ 459 ap1[_i] = value; \ 460 A->nonzerostate++;\ 461 a_noinsert: ; \ 462 ailen[row] = nrow1; \ 463 } 464 465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 466 { \ 467 if (col <= lastcol2) low2 = 0; \ 468 else high2 = nrow2; \ 469 lastcol2 = col; \ 470 while (high2-low2 > 5) { \ 471 t = (low2+high2)/2; \ 472 if (rp2[t] > col) high2 = t; \ 473 else low2 = t; \ 474 } \ 475 for (_i=low2; _i<high2; _i++) { \ 476 if (rp2[_i] > col) break; \ 477 if (rp2[_i] == col) { \ 478 if (addv == ADD_VALUES) ap2[_i] += value; \ 479 else ap2[_i] = value; \ 480 goto b_noinsert; \ 481 } \ 482 } \ 483 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 485 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 486 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 487 N = nrow2++ - 1; b->nz++; high2++; \ 488 /* shift up all the later entries in this row */ \ 489 for (ii=N; ii>=_i; ii--) { \ 490 rp2[ii+1] = rp2[ii]; \ 491 ap2[ii+1] = ap2[ii]; \ 492 } \ 493 rp2[_i] = col; \ 494 ap2[_i] = value; \ 495 B->nonzerostate++; \ 496 b_noinsert: ; \ 497 bilen[row] = nrow2; \ 498 } 499 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 527 { 528 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 529 PetscScalar value; 530 PetscErrorCode ierr; 531 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 532 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 533 PetscBool roworiented = aij->roworiented; 534 535 /* Some Variables required in the macro */ 536 Mat A = aij->A; 537 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 538 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 539 MatScalar *aa = a->a; 540 PetscBool ignorezeroentries = a->ignorezeroentries; 541 Mat B = aij->B; 542 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 543 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 544 MatScalar *ba = b->a; 545 546 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1,*ap2; 549 550 PetscFunctionBegin; 551 for (i=0; i<m; i++) { 552 if (im[i] < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 555 #endif 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = aj + ai[row]; 560 ap1 = aa + ai[row]; 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = bj + bi[row]; 567 ap2 = ba + bi[row]; 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j=0; j<n; j++) { 574 if (roworiented) value = v[i*n+j]; 575 else value = v[i+j*m]; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 580 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 581 } else if (in[j] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 584 #endif 585 else { 586 if (mat->was_assembled) { 587 if (!aij->colmap) { 588 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 589 } 590 #if defined(PETSC_USE_CTABLE) 591 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 592 col--; 593 #else 594 col = aij->colmap[in[j]] - 1; 595 #endif 596 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 597 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 598 col = in[j]; 599 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 600 B = aij->B; 601 b = (Mat_SeqAIJ*)B->data; 602 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 603 rp2 = bj + bi[row]; 604 ap2 = ba + bi[row]; 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0) { 612 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 613 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 614 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 619 } 620 } 621 } else { 622 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 638 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 639 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 640 */ 641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 642 { 643 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 644 Mat A = aij->A; /* diagonal part of the matrix */ 645 Mat B = aij->B; /* offdiagonal part of the matrix */ 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 648 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 649 PetscInt *ailen = a->ilen,*aj = a->j; 650 PetscInt *bilen = b->ilen,*bj = b->j; 651 PetscInt am = aij->A->rmap->n,j; 652 PetscInt diag_so_far = 0,dnz; 653 PetscInt offd_so_far = 0,onz; 654 655 PetscFunctionBegin; 656 /* Iterate over all rows of the matrix */ 657 for (j=0; j<am; j++) { 658 dnz = onz = 0; 659 /* Iterate over all non-zero columns of the current row */ 660 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 661 /* If column is in the diagonal */ 662 if (mat_j[col] >= cstart && mat_j[col] < cend) { 663 aj[diag_so_far++] = mat_j[col] - cstart; 664 dnz++; 665 } else { /* off-diagonal entries */ 666 bj[offd_so_far++] = mat_j[col]; 667 onz++; 668 } 669 } 670 ailen[j] = dnz; 671 bilen[j] = onz; 672 } 673 PetscFunctionReturn(0); 674 } 675 676 /* 677 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 678 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 679 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 680 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 681 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 682 */ 683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 Mat A = aij->A; /* diagonal part of the matrix */ 687 Mat B = aij->B; /* offdiagonal part of the matrix */ 688 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 691 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 692 PetscInt *ailen = a->ilen,*aj = a->j; 693 PetscInt *bilen = b->ilen,*bj = b->j; 694 PetscInt am = aij->A->rmap->n,j; 695 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 696 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 697 PetscScalar *aa = a->a,*ba = b->a; 698 699 PetscFunctionBegin; 700 /* Iterate over all rows of the matrix */ 701 for (j=0; j<am; j++) { 702 dnz_row = onz_row = 0; 703 rowstart_offd = full_offd_i[j]; 704 rowstart_diag = full_diag_i[j]; 705 /* Iterate over all non-zero columns of the current row */ 706 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 707 /* If column is in the diagonal */ 708 if (mat_j[col] >= cstart && mat_j[col] < cend) { 709 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 710 aa[rowstart_diag+dnz_row] = mat_a[col]; 711 dnz_row++; 712 } else { /* off-diagonal entries */ 713 bj[rowstart_offd+onz_row] = mat_j[col]; 714 ba[rowstart_offd+onz_row] = mat_a[col]; 715 onz_row++; 716 } 717 } 718 ailen[j] = dnz_row; 719 bilen[j] = onz_row; 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 727 PetscErrorCode ierr; 728 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 730 731 PetscFunctionBegin; 732 for (i=0; i<m; i++) { 733 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 734 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 735 if (idxm[i] >= rstart && idxm[i] < rend) { 736 row = idxm[i] - rstart; 737 for (j=0; j<n; j++) { 738 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 739 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743 } else { 744 if (!aij->colmap) { 745 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 746 } 747 #if defined(PETSC_USE_CTABLE) 748 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 749 col--; 750 #else 751 col = aij->colmap[idxn[j]] - 1; 752 #endif 753 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 754 else { 755 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 756 } 757 } 758 } 759 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 765 766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 PetscErrorCode ierr; 770 PetscInt nstash,reallocs; 771 772 PetscFunctionBegin; 773 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 774 775 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 776 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 777 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 782 { 783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 784 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 785 PetscErrorCode ierr; 786 PetscMPIInt n; 787 PetscInt i,j,rstart,ncols,flg; 788 PetscInt *row,*col; 789 PetscBool other_disassembled; 790 PetscScalar *val; 791 792 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 793 794 PetscFunctionBegin; 795 if (!aij->donotstash && !mat->nooffprocentries) { 796 while (1) { 797 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 798 if (!flg) break; 799 800 for (i=0; i<n; ) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 /* Now assemble all these values with a single function call */ 808 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 809 810 i = j; 811 } 812 } 813 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourselfs, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 825 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 826 if (mat->was_assembled && !other_disassembled) { 827 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 832 } 833 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 834 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 835 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 836 837 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 838 839 aij->rowvalues = 0; 840 841 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 842 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 859 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 /* get locally owned rows */ 873 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 874 /* fix right hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 880 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 882 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 883 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 884 } 885 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 886 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 887 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 888 if ((diag != 0.0) && cong) { 889 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 890 } else if (diag != 0.0) { 891 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 892 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 893 for (r = 0; r < len; ++r) { 894 const PetscInt row = lrows[r] + A->rmap->rstart; 895 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 896 } 897 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 899 } else { 900 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 901 } 902 ierr = PetscFree(lrows);CHKERRQ(ierr); 903 904 /* only change matrix nonzero state if pattern was allowed to be changed */ 905 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 906 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 907 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 908 } 909 PetscFunctionReturn(0); 910 } 911 912 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 913 { 914 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 915 PetscErrorCode ierr; 916 PetscMPIInt n = A->rmap->n; 917 PetscInt i,j,r,m,p = 0,len = 0; 918 PetscInt *lrows,*owners = A->rmap->range; 919 PetscSFNode *rrows; 920 PetscSF sf; 921 const PetscScalar *xx; 922 PetscScalar *bb,*mask; 923 Vec xmask,lmask; 924 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 925 const PetscInt *aj, *ii,*ridx; 926 PetscScalar *aa; 927 928 PetscFunctionBegin; 929 /* Create SF where leaves are input rows and roots are owned rows */ 930 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 931 for (r = 0; r < n; ++r) lrows[r] = -1; 932 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 933 for (r = 0; r < N; ++r) { 934 const PetscInt idx = rows[r]; 935 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 936 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 937 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 938 } 939 rrows[r].rank = p; 940 rrows[r].index = rows[r] - owners[p]; 941 } 942 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 943 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 944 /* Collect flags for rows to be zeroed */ 945 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 947 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 948 /* Compress and put in row numbers */ 949 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 950 /* zero diagonal part of matrix */ 951 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 952 /* handle off diagonal part of matrix */ 953 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 954 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 955 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 956 for (i=0; i<len; i++) bb[lrows[i]] = 1; 957 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 958 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 961 if (x) { 962 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 965 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 966 } 967 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 968 /* remove zeroed rows of off diagonal matrix */ 969 ii = aij->i; 970 for (i=0; i<len; i++) { 971 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 972 } 973 /* loop over all elements of off process part of matrix zeroing removed columns*/ 974 if (aij->compressedrow.use) { 975 m = aij->compressedrow.nrows; 976 ii = aij->compressedrow.i; 977 ridx = aij->compressedrow.rindex; 978 for (i=0; i<m; i++) { 979 n = ii[i+1] - ii[i]; 980 aj = aij->j + ii[i]; 981 aa = aij->a + ii[i]; 982 983 for (j=0; j<n; j++) { 984 if (PetscAbsScalar(mask[*aj])) { 985 if (b) bb[*ridx] -= *aa*xx[*aj]; 986 *aa = 0.0; 987 } 988 aa++; 989 aj++; 990 } 991 ridx++; 992 } 993 } else { /* do not use compressed row format */ 994 m = l->B->rmap->n; 995 for (i=0; i<m; i++) { 996 n = ii[i+1] - ii[i]; 997 aj = aij->j + ii[i]; 998 aa = aij->a + ii[i]; 999 for (j=0; j<n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[i] -= *aa*xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 } 1008 } 1009 if (x) { 1010 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1011 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 } 1013 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1014 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1015 ierr = PetscFree(lrows);CHKERRQ(ierr); 1016 1017 /* only change matrix nonzero state if pattern was allowed to be changed */ 1018 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1019 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1020 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1026 { 1027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1028 PetscErrorCode ierr; 1029 PetscInt nt; 1030 VecScatter Mvctx = a->Mvctx; 1031 1032 PetscFunctionBegin; 1033 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1034 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1035 1036 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1044 { 1045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 1048 PetscFunctionBegin; 1049 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1050 PetscFunctionReturn(0); 1051 } 1052 1053 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1054 { 1055 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1056 PetscErrorCode ierr; 1057 VecScatter Mvctx = a->Mvctx; 1058 1059 PetscFunctionBegin; 1060 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1061 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1063 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1064 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* do local part */ 1077 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1078 /* add partial results together */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1085 { 1086 MPI_Comm comm; 1087 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1088 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1089 IS Me,Notme; 1090 PetscErrorCode ierr; 1091 PetscInt M,N,first,last,*notme,i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1098 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1099 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1100 if (!*f) PetscFunctionReturn(0); 1101 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1102 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1103 if (size == 1) PetscFunctionReturn(0); 1104 1105 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1106 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1107 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1108 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1109 for (i=0; i<first; i++) notme[i] = i; 1110 for (i=last; i<M; i++) notme[i-last+first] = i; 1111 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1112 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1113 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1114 Aoff = Aoffs[0]; 1115 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1116 Boff = Boffs[0]; 1117 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1118 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1119 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1120 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1121 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1122 ierr = PetscFree(notme);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1127 { 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 /* do nondiagonal part */ 1142 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1143 /* do local part */ 1144 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1145 /* add partial results together */ 1146 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1147 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 /* 1152 This only works correctly for square matrices where the subblock A->A is the 1153 diagonal block 1154 */ 1155 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1156 { 1157 PetscErrorCode ierr; 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1159 1160 PetscFunctionBegin; 1161 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1162 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1163 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1168 { 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1174 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 #if defined(PETSC_USE_LOG) 1185 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1186 #endif 1187 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1188 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1189 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1190 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1191 #if defined(PETSC_USE_CTABLE) 1192 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1193 #else 1194 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1195 #endif 1196 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1197 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1198 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1199 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1200 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1201 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1202 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1203 1204 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1206 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1207 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1209 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1213 #if defined(PETSC_HAVE_ELEMENTAL) 1214 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1215 #endif 1216 #if defined(PETSC_HAVE_HYPRE) 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1219 #endif 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1228 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1229 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1230 PetscErrorCode ierr; 1231 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1232 int fd; 1233 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1234 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1235 PetscScalar *column_values; 1236 PetscInt message_count,flowcontrolcount; 1237 FILE *file; 1238 1239 PetscFunctionBegin; 1240 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1241 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1242 nz = A->nz + B->nz; 1243 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1244 if (!rank) { 1245 header[0] = MAT_FILE_CLASSID; 1246 header[1] = mat->rmap->N; 1247 header[2] = mat->cmap->N; 1248 1249 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1251 /* get largest number of rows any processor has */ 1252 rlen = mat->rmap->n; 1253 range = mat->rmap->range; 1254 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1255 } else { 1256 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1257 rlen = mat->rmap->n; 1258 } 1259 1260 /* load up the local row counts */ 1261 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1262 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1263 1264 /* store the row lengths to the file */ 1265 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1266 if (!rank) { 1267 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1268 for (i=1; i<size; i++) { 1269 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1270 rlen = range[i+1] - range[i]; 1271 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1273 } 1274 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1275 } else { 1276 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1277 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1279 } 1280 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1281 1282 /* load up the local column indices */ 1283 nzmax = nz; /* th processor needs space a largest processor needs */ 1284 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1286 cnt = 0; 1287 for (i=0; i<mat->rmap->n; i++) { 1288 for (j=B->i[i]; j<B->i[i+1]; j++) { 1289 if ((col = garray[B->j[j]]) > cstart) break; 1290 column_indices[cnt++] = col; 1291 } 1292 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1293 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1294 } 1295 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1296 1297 /* store the column indices to the file */ 1298 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1299 if (!rank) { 1300 MPI_Status status; 1301 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1302 for (i=1; i<size; i++) { 1303 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1304 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1305 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1306 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1307 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1308 } 1309 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1310 } else { 1311 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1312 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1313 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1315 } 1316 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1317 1318 /* load up the local column values */ 1319 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1320 cnt = 0; 1321 for (i=0; i<mat->rmap->n; i++) { 1322 for (j=B->i[i]; j<B->i[i+1]; j++) { 1323 if (garray[B->j[j]] > cstart) break; 1324 column_values[cnt++] = B->a[j]; 1325 } 1326 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1327 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1328 } 1329 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1330 1331 /* store the column values to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 MPI_Status status; 1335 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1338 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1339 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1340 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1342 } 1343 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1344 } else { 1345 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1346 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1348 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1349 } 1350 ierr = PetscFree(column_values);CHKERRQ(ierr); 1351 1352 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1353 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #include <petscdraw.h> 1358 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 PetscMPIInt rank = aij->rank,size = aij->size; 1363 PetscBool isdraw,iascii,isbinary; 1364 PetscViewer sviewer; 1365 PetscViewerFormat format; 1366 1367 PetscFunctionBegin; 1368 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1369 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1370 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1371 if (iascii) { 1372 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1373 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1374 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1375 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1376 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 for (i=0; i<(PetscInt)size; i++) { 1378 nmax = PetscMax(nmax,nz[i]); 1379 nmin = PetscMin(nmin,nz[i]); 1380 navg += nz[i]; 1381 } 1382 ierr = PetscFree(nz);CHKERRQ(ierr); 1383 navg = navg/size; 1384 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1388 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1389 MatInfo info; 1390 PetscBool inodes; 1391 1392 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1393 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1394 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1396 if (!inodes) { 1397 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1398 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1399 } else { 1400 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1401 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1402 } 1403 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1404 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1405 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1407 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1410 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1413 PetscInt inodecount,inodelimit,*inodes; 1414 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1415 if (inodes) { 1416 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1417 } else { 1418 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1422 PetscFunctionReturn(0); 1423 } 1424 } else if (isbinary) { 1425 if (size == 1) { 1426 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1427 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1428 } else { 1429 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (iascii && size == 1) { 1433 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1434 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (isdraw) { 1437 PetscDraw draw; 1438 PetscBool isnull; 1439 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1440 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1441 if (isnull) PetscFunctionReturn(0); 1442 } 1443 1444 { /* assemble the entire matrix onto first processor */ 1445 Mat A = NULL, Av; 1446 IS isrow,iscol; 1447 1448 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1449 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1450 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1451 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1452 /* The commented code uses MatCreateSubMatrices instead */ 1453 /* 1454 Mat *AA, A = NULL, Av; 1455 IS isrow,iscol; 1456 1457 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1458 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1459 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1460 if (!rank) { 1461 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1462 A = AA[0]; 1463 Av = AA[0]; 1464 } 1465 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1466 */ 1467 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1468 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1469 /* 1470 Everyone has to call to draw the matrix since the graphics waits are 1471 synchronized across all processors that share the PetscDraw object 1472 */ 1473 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1474 if (!rank) { 1475 if (((PetscObject)mat)->name) { 1476 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1477 } 1478 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1479 } 1480 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1481 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1482 ierr = MatDestroy(&A);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1488 { 1489 PetscErrorCode ierr; 1490 PetscBool iascii,isdraw,issocket,isbinary; 1491 1492 PetscFunctionBegin; 1493 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1494 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1495 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1496 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1497 if (iascii || isdraw || isbinary || issocket) { 1498 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1504 { 1505 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1506 PetscErrorCode ierr; 1507 Vec bb1 = 0; 1508 PetscBool hasop; 1509 1510 PetscFunctionBegin; 1511 if (flag == SOR_APPLY_UPPER) { 1512 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1513 PetscFunctionReturn(0); 1514 } 1515 1516 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1517 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1518 } 1519 1520 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1521 if (flag & SOR_ZERO_INITIAL_GUESS) { 1522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1523 its--; 1524 } 1525 1526 while (its--) { 1527 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 1530 /* update rhs: bb1 = bb - B*x */ 1531 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1532 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1536 } 1537 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1538 if (flag & SOR_ZERO_INITIAL_GUESS) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 its--; 1541 } 1542 while (its--) { 1543 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 1546 /* update rhs: bb1 = bb - B*x */ 1547 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1548 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1549 1550 /* local sweep */ 1551 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1552 } 1553 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1554 if (flag & SOR_ZERO_INITIAL_GUESS) { 1555 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1556 its--; 1557 } 1558 while (its--) { 1559 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1560 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1561 1562 /* update rhs: bb1 = bb - B*x */ 1563 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1564 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1565 1566 /* local sweep */ 1567 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1568 } 1569 } else if (flag & SOR_EISENSTAT) { 1570 Vec xx1; 1571 1572 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1573 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1574 1575 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1576 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 if (!mat->diag) { 1578 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1579 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1580 } 1581 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1582 if (hasop) { 1583 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1584 } else { 1585 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1586 } 1587 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1588 1589 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1590 1591 /* local sweep */ 1592 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1593 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1594 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1595 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1596 1597 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1598 1599 matin->factorerrortype = mat->A->factorerrortype; 1600 PetscFunctionReturn(0); 1601 } 1602 1603 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1604 { 1605 Mat aA,aB,Aperm; 1606 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1607 PetscScalar *aa,*ba; 1608 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1609 PetscSF rowsf,sf; 1610 IS parcolp = NULL; 1611 PetscBool done; 1612 PetscErrorCode ierr; 1613 1614 PetscFunctionBegin; 1615 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1616 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1617 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1618 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1619 1620 /* Invert row permutation to find out where my rows should go */ 1621 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1622 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1623 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1624 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1625 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1626 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1627 1628 /* Invert column permutation to find out where my columns should go */ 1629 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1630 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1631 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1632 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1633 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1634 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1636 1637 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1638 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1639 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1640 1641 /* Find out where my gcols should go */ 1642 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1643 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1644 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1645 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1646 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1647 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1648 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1649 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1650 1651 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1652 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1653 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1654 for (i=0; i<m; i++) { 1655 PetscInt row = rdest[i],rowner; 1656 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1657 for (j=ai[i]; j<ai[i+1]; j++) { 1658 PetscInt cowner,col = cdest[aj[j]]; 1659 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1660 if (rowner == cowner) dnnz[i]++; 1661 else onnz[i]++; 1662 } 1663 for (j=bi[i]; j<bi[i+1]; j++) { 1664 PetscInt cowner,col = gcdest[bj[j]]; 1665 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1666 if (rowner == cowner) dnnz[i]++; 1667 else onnz[i]++; 1668 } 1669 } 1670 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1671 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1672 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1673 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1674 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1675 1676 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1677 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1678 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1679 for (i=0; i<m; i++) { 1680 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1681 PetscInt j0,rowlen; 1682 rowlen = ai[i+1] - ai[i]; 1683 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1684 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1685 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1686 } 1687 rowlen = bi[i+1] - bi[i]; 1688 for (j0=j=0; j<rowlen; j0=j) { 1689 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1690 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1691 } 1692 } 1693 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1694 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1695 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1696 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1697 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1698 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1699 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1701 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1702 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1703 *B = Aperm; 1704 PetscFunctionReturn(0); 1705 } 1706 1707 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1708 { 1709 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1714 if (ghosts) *ghosts = aij->garray; 1715 PetscFunctionReturn(0); 1716 } 1717 1718 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1719 { 1720 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1721 Mat A = mat->A,B = mat->B; 1722 PetscErrorCode ierr; 1723 PetscReal isend[5],irecv[5]; 1724 1725 PetscFunctionBegin; 1726 info->block_size = 1.0; 1727 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1728 1729 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1730 isend[3] = info->memory; isend[4] = info->mallocs; 1731 1732 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1733 1734 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1735 isend[3] += info->memory; isend[4] += info->mallocs; 1736 if (flag == MAT_LOCAL) { 1737 info->nz_used = isend[0]; 1738 info->nz_allocated = isend[1]; 1739 info->nz_unneeded = isend[2]; 1740 info->memory = isend[3]; 1741 info->mallocs = isend[4]; 1742 } else if (flag == MAT_GLOBAL_MAX) { 1743 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1744 1745 info->nz_used = irecv[0]; 1746 info->nz_allocated = irecv[1]; 1747 info->nz_unneeded = irecv[2]; 1748 info->memory = irecv[3]; 1749 info->mallocs = irecv[4]; 1750 } else if (flag == MAT_GLOBAL_SUM) { 1751 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1752 1753 info->nz_used = irecv[0]; 1754 info->nz_allocated = irecv[1]; 1755 info->nz_unneeded = irecv[2]; 1756 info->memory = irecv[3]; 1757 info->mallocs = irecv[4]; 1758 } 1759 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1760 info->fill_ratio_needed = 0; 1761 info->factor_mallocs = 0; 1762 PetscFunctionReturn(0); 1763 } 1764 1765 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1766 { 1767 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1768 PetscErrorCode ierr; 1769 1770 PetscFunctionBegin; 1771 switch (op) { 1772 case MAT_NEW_NONZERO_LOCATIONS: 1773 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1774 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1775 case MAT_KEEP_NONZERO_PATTERN: 1776 case MAT_NEW_NONZERO_LOCATION_ERR: 1777 case MAT_USE_INODES: 1778 case MAT_IGNORE_ZERO_ENTRIES: 1779 MatCheckPreallocated(A,1); 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1782 break; 1783 case MAT_ROW_ORIENTED: 1784 MatCheckPreallocated(A,1); 1785 a->roworiented = flg; 1786 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_NEW_DIAGONALS: 1791 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1792 break; 1793 case MAT_IGNORE_OFF_PROC_ENTRIES: 1794 a->donotstash = flg; 1795 break; 1796 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1797 case MAT_SPD: 1798 case MAT_SYMMETRIC: 1799 case MAT_STRUCTURALLY_SYMMETRIC: 1800 case MAT_HERMITIAN: 1801 case MAT_SYMMETRY_ETERNAL: 1802 break; 1803 case MAT_SUBMAT_SINGLEIS: 1804 A->submat_singleis = flg; 1805 break; 1806 case MAT_STRUCTURE_ONLY: 1807 /* The option is handled directly by MatSetOption() */ 1808 break; 1809 default: 1810 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1816 { 1817 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1818 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1819 PetscErrorCode ierr; 1820 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1821 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1822 PetscInt *cmap,*idx_p; 1823 1824 PetscFunctionBegin; 1825 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1826 mat->getrowactive = PETSC_TRUE; 1827 1828 if (!mat->rowvalues && (idx || v)) { 1829 /* 1830 allocate enough space to hold information from the longest row. 1831 */ 1832 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1833 PetscInt max = 1,tmp; 1834 for (i=0; i<matin->rmap->n; i++) { 1835 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1836 if (max < tmp) max = tmp; 1837 } 1838 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1839 } 1840 1841 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1842 lrow = row - rstart; 1843 1844 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1845 if (!v) {pvA = 0; pvB = 0;} 1846 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1847 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1848 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1849 nztot = nzA + nzB; 1850 1851 cmap = mat->garray; 1852 if (v || idx) { 1853 if (nztot) { 1854 /* Sort by increasing column numbers, assuming A and B already sorted */ 1855 PetscInt imark = -1; 1856 if (v) { 1857 *v = v_p = mat->rowvalues; 1858 for (i=0; i<nzB; i++) { 1859 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1860 else break; 1861 } 1862 imark = i; 1863 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1864 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1865 } 1866 if (idx) { 1867 *idx = idx_p = mat->rowindices; 1868 if (imark > -1) { 1869 for (i=0; i<imark; i++) { 1870 idx_p[i] = cmap[cworkB[i]]; 1871 } 1872 } else { 1873 for (i=0; i<nzB; i++) { 1874 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1875 else break; 1876 } 1877 imark = i; 1878 } 1879 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1880 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1881 } 1882 } else { 1883 if (idx) *idx = 0; 1884 if (v) *v = 0; 1885 } 1886 } 1887 *nz = nztot; 1888 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1889 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1894 { 1895 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1896 1897 PetscFunctionBegin; 1898 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1899 aij->getrowactive = PETSC_FALSE; 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1904 { 1905 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1906 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1907 PetscErrorCode ierr; 1908 PetscInt i,j,cstart = mat->cmap->rstart; 1909 PetscReal sum = 0.0; 1910 MatScalar *v; 1911 1912 PetscFunctionBegin; 1913 if (aij->size == 1) { 1914 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1915 } else { 1916 if (type == NORM_FROBENIUS) { 1917 v = amat->a; 1918 for (i=0; i<amat->nz; i++) { 1919 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1920 } 1921 v = bmat->a; 1922 for (i=0; i<bmat->nz; i++) { 1923 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1924 } 1925 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1926 *norm = PetscSqrtReal(*norm); 1927 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1928 } else if (type == NORM_1) { /* max column norm */ 1929 PetscReal *tmp,*tmp2; 1930 PetscInt *jj,*garray = aij->garray; 1931 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1932 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1933 *norm = 0.0; 1934 v = amat->a; jj = amat->j; 1935 for (j=0; j<amat->nz; j++) { 1936 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1937 } 1938 v = bmat->a; jj = bmat->j; 1939 for (j=0; j<bmat->nz; j++) { 1940 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1941 } 1942 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1943 for (j=0; j<mat->cmap->N; j++) { 1944 if (tmp2[j] > *norm) *norm = tmp2[j]; 1945 } 1946 ierr = PetscFree(tmp);CHKERRQ(ierr); 1947 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1948 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1949 } else if (type == NORM_INFINITY) { /* max row norm */ 1950 PetscReal ntemp = 0.0; 1951 for (j=0; j<aij->A->rmap->n; j++) { 1952 v = amat->a + amat->i[j]; 1953 sum = 0.0; 1954 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1955 sum += PetscAbsScalar(*v); v++; 1956 } 1957 v = bmat->a + bmat->i[j]; 1958 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1959 sum += PetscAbsScalar(*v); v++; 1960 } 1961 if (sum > ntemp) ntemp = sum; 1962 } 1963 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1964 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1965 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1966 } 1967 PetscFunctionReturn(0); 1968 } 1969 1970 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1971 { 1972 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1973 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1974 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 1975 PetscErrorCode ierr; 1976 Mat B,A_diag,*B_diag; 1977 MatScalar *array; 1978 1979 PetscFunctionBegin; 1980 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1981 ai = Aloc->i; aj = Aloc->j; 1982 bi = Bloc->i; bj = Bloc->j; 1983 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1984 PetscInt *d_nnz,*g_nnz,*o_nnz; 1985 PetscSFNode *oloc; 1986 PETSC_UNUSED PetscSF sf; 1987 1988 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1989 /* compute d_nnz for preallocation */ 1990 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1991 for (i=0; i<ai[ma]; i++) { 1992 d_nnz[aj[i]]++; 1993 } 1994 /* compute local off-diagonal contributions */ 1995 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1996 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1997 /* map those to global */ 1998 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1999 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2000 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2001 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2002 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2003 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2004 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2005 2006 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2007 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2008 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2009 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2010 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2011 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2012 } else { 2013 B = *matout; 2014 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2015 } 2016 2017 b = (Mat_MPIAIJ*)B->data; 2018 A_diag = a->A; 2019 B_diag = &b->A; 2020 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2021 A_diag_ncol = A_diag->cmap->N; 2022 B_diag_ilen = sub_B_diag->ilen; 2023 B_diag_i = sub_B_diag->i; 2024 2025 /* Set ilen for diagonal of B */ 2026 for (i=0; i<A_diag_ncol; i++) { 2027 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2028 } 2029 2030 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2031 very quickly (=without using MatSetValues), because all writes are local. */ 2032 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2033 2034 /* copy over the B part */ 2035 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2036 array = Bloc->a; 2037 row = A->rmap->rstart; 2038 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2039 cols_tmp = cols; 2040 for (i=0; i<mb; i++) { 2041 ncol = bi[i+1]-bi[i]; 2042 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2043 row++; 2044 array += ncol; cols_tmp += ncol; 2045 } 2046 ierr = PetscFree(cols);CHKERRQ(ierr); 2047 2048 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2049 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2051 *matout = B; 2052 } else { 2053 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2059 { 2060 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2061 Mat a = aij->A,b = aij->B; 2062 PetscErrorCode ierr; 2063 PetscInt s1,s2,s3; 2064 2065 PetscFunctionBegin; 2066 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2067 if (rr) { 2068 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2069 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2070 /* Overlap communication with computation. */ 2071 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 if (ll) { 2074 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2075 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2076 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2077 } 2078 /* scale the diagonal block */ 2079 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2080 2081 if (rr) { 2082 /* Do a scatter end and then right scale the off-diagonal block */ 2083 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2090 { 2091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2100 { 2101 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2102 Mat a,b,c,d; 2103 PetscBool flg; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 a = matA->A; b = matA->B; 2108 c = matB->A; d = matB->B; 2109 2110 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2111 if (flg) { 2112 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2113 } 2114 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2123 2124 PetscFunctionBegin; 2125 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2126 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2127 /* because of the column compression in the off-processor part of the matrix a->B, 2128 the number of columns in a->B and b->B may be different, hence we cannot call 2129 the MatCopy() directly on the two parts. If need be, we can provide a more 2130 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2131 then copying the submatrices */ 2132 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2133 } else { 2134 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2135 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2136 } 2137 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2142 { 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* 2151 Computes the number of nonzeros per row needed for preallocation when X and Y 2152 have different nonzero structure. 2153 */ 2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2155 { 2156 PetscInt i,j,k,nzx,nzy; 2157 2158 PetscFunctionBegin; 2159 /* Set the number of nonzeros in the new matrix */ 2160 for (i=0; i<m; i++) { 2161 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2162 nzx = xi[i+1] - xi[i]; 2163 nzy = yi[i+1] - yi[i]; 2164 nnz[i] = 0; 2165 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2166 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2167 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2168 nnz[i]++; 2169 } 2170 for (; k<nzy; k++) nnz[i]++; 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2177 { 2178 PetscErrorCode ierr; 2179 PetscInt m = Y->rmap->N; 2180 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2181 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2182 2183 PetscFunctionBegin; 2184 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2189 { 2190 PetscErrorCode ierr; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ*)xx->A->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 y = (Mat_SeqAIJ*)yy->A->data; 2201 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2202 x = (Mat_SeqAIJ*)xx->B->data; 2203 y = (Mat_SeqAIJ*)yy->B->data; 2204 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2205 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2206 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2208 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2209 } else { 2210 Mat B; 2211 PetscInt *nnz_d,*nnz_o; 2212 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2213 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2214 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2215 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2216 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2217 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2218 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2219 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2220 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2221 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2222 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2223 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2224 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2225 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2231 2232 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2233 { 2234 #if defined(PETSC_USE_COMPLEX) 2235 PetscErrorCode ierr; 2236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2237 2238 PetscFunctionBegin; 2239 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2240 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2241 #else 2242 PetscFunctionBegin; 2243 #endif 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2248 { 2249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2250 PetscErrorCode ierr; 2251 2252 PetscFunctionBegin; 2253 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2254 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2265 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2270 { 2271 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2272 PetscErrorCode ierr; 2273 PetscInt i,*idxb = 0; 2274 PetscScalar *va,*vb; 2275 Vec vtmp; 2276 2277 PetscFunctionBegin; 2278 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2279 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2280 if (idx) { 2281 for (i=0; i<A->rmap->n; i++) { 2282 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2283 } 2284 } 2285 2286 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2287 if (idx) { 2288 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2289 } 2290 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2291 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2292 2293 for (i=0; i<A->rmap->n; i++) { 2294 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2295 va[i] = vb[i]; 2296 if (idx) idx[i] = a->garray[idxb[i]]; 2297 } 2298 } 2299 2300 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2302 ierr = PetscFree(idxb);CHKERRQ(ierr); 2303 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2308 { 2309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2310 PetscErrorCode ierr; 2311 PetscInt i,*idxb = 0; 2312 PetscScalar *va,*vb; 2313 Vec vtmp; 2314 2315 PetscFunctionBegin; 2316 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2317 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2318 if (idx) { 2319 for (i=0; i<A->cmap->n; i++) { 2320 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2321 } 2322 } 2323 2324 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2325 if (idx) { 2326 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2327 } 2328 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2329 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2330 2331 for (i=0; i<A->rmap->n; i++) { 2332 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2333 va[i] = vb[i]; 2334 if (idx) idx[i] = a->garray[idxb[i]]; 2335 } 2336 } 2337 2338 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2339 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2340 ierr = PetscFree(idxb);CHKERRQ(ierr); 2341 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2348 PetscInt n = A->rmap->n; 2349 PetscInt cstart = A->cmap->rstart; 2350 PetscInt *cmap = mat->garray; 2351 PetscInt *diagIdx, *offdiagIdx; 2352 Vec diagV, offdiagV; 2353 PetscScalar *a, *diagA, *offdiagA; 2354 PetscInt r; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2361 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 for (r = 0; r < n; ++r) { 2367 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2368 a[r] = diagA[r]; 2369 idx[r] = cstart + diagIdx[r]; 2370 } else { 2371 a[r] = offdiagA[r]; 2372 idx[r] = cmap[offdiagIdx[r]]; 2373 } 2374 } 2375 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2380 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2385 { 2386 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2387 PetscInt n = A->rmap->n; 2388 PetscInt cstart = A->cmap->rstart; 2389 PetscInt *cmap = mat->garray; 2390 PetscInt *diagIdx, *offdiagIdx; 2391 Vec diagV, offdiagV; 2392 PetscScalar *a, *diagA, *offdiagA; 2393 PetscInt r; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2399 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2400 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2401 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2402 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2403 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2404 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2405 for (r = 0; r < n; ++r) { 2406 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2407 a[r] = diagA[r]; 2408 idx[r] = cstart + diagIdx[r]; 2409 } else { 2410 a[r] = offdiagA[r]; 2411 idx[r] = cmap[offdiagIdx[r]]; 2412 } 2413 } 2414 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2416 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2417 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2418 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2419 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2424 { 2425 PetscErrorCode ierr; 2426 Mat *dummy; 2427 2428 PetscFunctionBegin; 2429 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2430 *newmat = *dummy; 2431 ierr = PetscFree(dummy);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2442 A->factorerrortype = a->A->factorerrortype; 2443 PetscFunctionReturn(0); 2444 } 2445 2446 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2447 { 2448 PetscErrorCode ierr; 2449 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2450 2451 PetscFunctionBegin; 2452 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2453 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2454 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2455 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2460 { 2461 PetscFunctionBegin; 2462 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2463 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2464 PetscFunctionReturn(0); 2465 } 2466 2467 /*@ 2468 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2469 2470 Collective on Mat 2471 2472 Input Parameters: 2473 + A - the matrix 2474 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2475 2476 Level: advanced 2477 2478 @*/ 2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2480 { 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2489 { 2490 PetscErrorCode ierr; 2491 PetscBool sc = PETSC_FALSE,flg; 2492 2493 PetscFunctionBegin; 2494 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2495 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2496 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2497 if (flg) { 2498 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2499 } 2500 ierr = PetscOptionsTail();CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2505 { 2506 PetscErrorCode ierr; 2507 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2508 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2509 2510 PetscFunctionBegin; 2511 if (!Y->preallocated) { 2512 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2513 } else if (!aij->nz) { 2514 PetscInt nonew = aij->nonew; 2515 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2516 aij->nonew = nonew; 2517 } 2518 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2523 { 2524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2529 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2530 if (d) { 2531 PetscInt rstart; 2532 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2533 *d += rstart; 2534 2535 } 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2540 { 2541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2542 PetscErrorCode ierr; 2543 2544 PetscFunctionBegin; 2545 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /* -------------------------------------------------------------------*/ 2550 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2551 MatGetRow_MPIAIJ, 2552 MatRestoreRow_MPIAIJ, 2553 MatMult_MPIAIJ, 2554 /* 4*/ MatMultAdd_MPIAIJ, 2555 MatMultTranspose_MPIAIJ, 2556 MatMultTransposeAdd_MPIAIJ, 2557 0, 2558 0, 2559 0, 2560 /*10*/ 0, 2561 0, 2562 0, 2563 MatSOR_MPIAIJ, 2564 MatTranspose_MPIAIJ, 2565 /*15*/ MatGetInfo_MPIAIJ, 2566 MatEqual_MPIAIJ, 2567 MatGetDiagonal_MPIAIJ, 2568 MatDiagonalScale_MPIAIJ, 2569 MatNorm_MPIAIJ, 2570 /*20*/ MatAssemblyBegin_MPIAIJ, 2571 MatAssemblyEnd_MPIAIJ, 2572 MatSetOption_MPIAIJ, 2573 MatZeroEntries_MPIAIJ, 2574 /*24*/ MatZeroRows_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*29*/ MatSetUp_MPIAIJ, 2580 0, 2581 0, 2582 MatGetDiagonalBlock_MPIAIJ, 2583 0, 2584 /*34*/ MatDuplicate_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*39*/ MatAXPY_MPIAIJ, 2590 MatCreateSubMatrices_MPIAIJ, 2591 MatIncreaseOverlap_MPIAIJ, 2592 MatGetValues_MPIAIJ, 2593 MatCopy_MPIAIJ, 2594 /*44*/ MatGetRowMax_MPIAIJ, 2595 MatScale_MPIAIJ, 2596 MatShift_MPIAIJ, 2597 MatDiagonalSet_MPIAIJ, 2598 MatZeroRowsColumns_MPIAIJ, 2599 /*49*/ MatSetRandom_MPIAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /*54*/ MatFDColoringCreate_MPIXAIJ, 2605 0, 2606 MatSetUnfactored_MPIAIJ, 2607 MatPermute_MPIAIJ, 2608 0, 2609 /*59*/ MatCreateSubMatrix_MPIAIJ, 2610 MatDestroy_MPIAIJ, 2611 MatView_MPIAIJ, 2612 0, 2613 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2614 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2615 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2620 MatGetRowMinAbs_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*75*/ MatFDColoringApply_AIJ, 2626 MatSetFromOptions_MPIAIJ, 2627 0, 2628 0, 2629 MatFindZeroDiagonals_MPIAIJ, 2630 /*80*/ 0, 2631 0, 2632 0, 2633 /*83*/ MatLoad_MPIAIJ, 2634 MatIsSymmetric_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2640 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2641 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2642 MatPtAP_MPIAIJ_MPIAIJ, 2643 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2644 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*99*/ 0, 2650 0, 2651 0, 2652 MatConjugate_MPIAIJ, 2653 0, 2654 /*104*/MatSetValuesRow_MPIAIJ, 2655 MatRealPart_MPIAIJ, 2656 MatImaginaryPart_MPIAIJ, 2657 0, 2658 0, 2659 /*109*/0, 2660 0, 2661 MatGetRowMin_MPIAIJ, 2662 0, 2663 MatMissingDiagonal_MPIAIJ, 2664 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2665 0, 2666 MatGetGhosts_MPIAIJ, 2667 0, 2668 0, 2669 /*119*/0, 2670 0, 2671 0, 2672 0, 2673 MatGetMultiProcBlock_MPIAIJ, 2674 /*124*/MatFindNonzeroRows_MPIAIJ, 2675 MatGetColumnNorms_MPIAIJ, 2676 MatInvertBlockDiagonal_MPIAIJ, 2677 MatInvertVariableBlockDiagonal_MPIAIJ, 2678 MatCreateSubMatricesMPI_MPIAIJ, 2679 /*129*/0, 2680 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2681 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2682 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 /*134*/0, 2685 0, 2686 MatRARt_MPIAIJ_MPIAIJ, 2687 0, 2688 0, 2689 /*139*/MatSetBlockSizes_MPIAIJ, 2690 0, 2691 0, 2692 MatFDColoringSetUp_MPIXAIJ, 2693 MatFindOffBlockDiagonalEntries_MPIAIJ, 2694 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2695 }; 2696 2697 /* ----------------------------------------------------------------------------------------*/ 2698 2699 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2700 { 2701 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2706 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2711 { 2712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2713 PetscErrorCode ierr; 2714 2715 PetscFunctionBegin; 2716 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2717 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2718 PetscFunctionReturn(0); 2719 } 2720 2721 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2722 { 2723 Mat_MPIAIJ *b; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2728 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2729 b = (Mat_MPIAIJ*)B->data; 2730 2731 #if defined(PETSC_USE_CTABLE) 2732 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2733 #else 2734 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2735 #endif 2736 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2737 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2738 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2739 2740 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2741 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2742 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2743 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2744 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2745 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2746 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2747 2748 if (!B->preallocated) { 2749 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2750 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2751 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2752 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2753 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2754 } 2755 2756 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2757 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2758 B->preallocated = PETSC_TRUE; 2759 B->was_assembled = PETSC_FALSE; 2760 B->assembled = PETSC_FALSE; 2761 PetscFunctionReturn(0); 2762 } 2763 2764 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2765 { 2766 Mat_MPIAIJ *b; 2767 PetscErrorCode ierr; 2768 2769 PetscFunctionBegin; 2770 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 b = (Mat_MPIAIJ*)B->data; 2774 2775 #if defined(PETSC_USE_CTABLE) 2776 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2777 #else 2778 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2779 #endif 2780 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2781 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2782 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2783 2784 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2785 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2786 B->preallocated = PETSC_TRUE; 2787 B->was_assembled = PETSC_FALSE; 2788 B->assembled = PETSC_FALSE; 2789 PetscFunctionReturn(0); 2790 } 2791 2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2793 { 2794 Mat mat; 2795 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 *newmat = 0; 2800 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2801 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2802 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2803 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2804 a = (Mat_MPIAIJ*)mat->data; 2805 2806 mat->factortype = matin->factortype; 2807 mat->assembled = PETSC_TRUE; 2808 mat->insertmode = NOT_SET_VALUES; 2809 mat->preallocated = PETSC_TRUE; 2810 2811 a->size = oldmat->size; 2812 a->rank = oldmat->rank; 2813 a->donotstash = oldmat->donotstash; 2814 a->roworiented = oldmat->roworiented; 2815 a->rowindices = 0; 2816 a->rowvalues = 0; 2817 a->getrowactive = PETSC_FALSE; 2818 2819 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2821 2822 if (oldmat->colmap) { 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2827 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2828 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2829 #endif 2830 } else a->colmap = 0; 2831 if (oldmat->garray) { 2832 PetscInt len; 2833 len = oldmat->B->cmap->n; 2834 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2835 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2836 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2837 } else a->garray = 0; 2838 2839 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2841 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2843 2844 if (oldmat->Mvctx_mpi1) { 2845 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2846 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2847 } 2848 2849 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2850 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2851 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2853 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2854 *newmat = mat; 2855 PetscFunctionReturn(0); 2856 } 2857 2858 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2859 { 2860 PetscBool isbinary, ishdf5; 2861 PetscErrorCode ierr; 2862 2863 PetscFunctionBegin; 2864 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2865 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2866 /* force binary viewer to load .info file if it has not yet done so */ 2867 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2868 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2869 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2870 if (isbinary) { 2871 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2872 } else if (ishdf5) { 2873 #if defined(PETSC_HAVE_HDF5) 2874 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2875 #else 2876 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2877 #endif 2878 } else { 2879 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2880 } 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2885 { 2886 PetscScalar *vals,*svals; 2887 MPI_Comm comm; 2888 PetscErrorCode ierr; 2889 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2890 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2891 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2892 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2893 PetscInt cend,cstart,n,*rowners; 2894 int fd; 2895 PetscInt bs = newMat->rmap->bs; 2896 2897 PetscFunctionBegin; 2898 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2900 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2901 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2902 if (!rank) { 2903 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2904 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2905 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2906 } 2907 2908 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2909 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2910 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2911 if (bs < 0) bs = 1; 2912 2913 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2914 M = header[1]; N = header[2]; 2915 2916 /* If global sizes are set, check if they are consistent with that given in the file */ 2917 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2918 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2919 2920 /* determine ownership of all (block) rows */ 2921 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2922 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2923 else m = newMat->rmap->n; /* Set by user */ 2924 2925 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2926 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2927 2928 /* First process needs enough room for process with most rows */ 2929 if (!rank) { 2930 mmax = rowners[1]; 2931 for (i=2; i<=size; i++) { 2932 mmax = PetscMax(mmax, rowners[i]); 2933 } 2934 } else mmax = -1; /* unused, but compilers complain */ 2935 2936 rowners[0] = 0; 2937 for (i=2; i<=size; i++) { 2938 rowners[i] += rowners[i-1]; 2939 } 2940 rstart = rowners[rank]; 2941 rend = rowners[rank+1]; 2942 2943 /* distribute row lengths to all processors */ 2944 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2945 if (!rank) { 2946 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2947 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2948 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2949 for (j=0; j<m; j++) { 2950 procsnz[0] += ourlens[j]; 2951 } 2952 for (i=1; i<size; i++) { 2953 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2954 /* calculate the number of nonzeros on each processor */ 2955 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2956 procsnz[i] += rowlengths[j]; 2957 } 2958 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2959 } 2960 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2961 } else { 2962 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2963 } 2964 2965 if (!rank) { 2966 /* determine max buffer needed and allocate it */ 2967 maxnz = 0; 2968 for (i=0; i<size; i++) { 2969 maxnz = PetscMax(maxnz,procsnz[i]); 2970 } 2971 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2972 2973 /* read in my part of the matrix column indices */ 2974 nz = procsnz[0]; 2975 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2976 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2977 2978 /* read in every one elses and ship off */ 2979 for (i=1; i<size; i++) { 2980 nz = procsnz[i]; 2981 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2982 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2983 } 2984 ierr = PetscFree(cols);CHKERRQ(ierr); 2985 } else { 2986 /* determine buffer space needed for message */ 2987 nz = 0; 2988 for (i=0; i<m; i++) { 2989 nz += ourlens[i]; 2990 } 2991 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2992 2993 /* receive message of column indices*/ 2994 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2995 } 2996 2997 /* determine column ownership if matrix is not square */ 2998 if (N != M) { 2999 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3000 else n = newMat->cmap->n; 3001 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3002 cstart = cend - n; 3003 } else { 3004 cstart = rstart; 3005 cend = rend; 3006 n = cend - cstart; 3007 } 3008 3009 /* loop over local rows, determining number of off diagonal entries */ 3010 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3011 jj = 0; 3012 for (i=0; i<m; i++) { 3013 for (j=0; j<ourlens[i]; j++) { 3014 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3015 jj++; 3016 } 3017 } 3018 3019 for (i=0; i<m; i++) { 3020 ourlens[i] -= offlens[i]; 3021 } 3022 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3023 3024 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3025 3026 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3027 3028 for (i=0; i<m; i++) { 3029 ourlens[i] += offlens[i]; 3030 } 3031 3032 if (!rank) { 3033 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3034 3035 /* read in my part of the matrix numerical values */ 3036 nz = procsnz[0]; 3037 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3038 3039 /* insert into matrix */ 3040 jj = rstart; 3041 smycols = mycols; 3042 svals = vals; 3043 for (i=0; i<m; i++) { 3044 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3045 smycols += ourlens[i]; 3046 svals += ourlens[i]; 3047 jj++; 3048 } 3049 3050 /* read in other processors and ship out */ 3051 for (i=1; i<size; i++) { 3052 nz = procsnz[i]; 3053 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3054 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3055 } 3056 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3057 } else { 3058 /* receive numeric values */ 3059 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3060 3061 /* receive message of values*/ 3062 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3063 3064 /* insert into matrix */ 3065 jj = rstart; 3066 smycols = mycols; 3067 svals = vals; 3068 for (i=0; i<m; i++) { 3069 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3070 smycols += ourlens[i]; 3071 svals += ourlens[i]; 3072 jj++; 3073 } 3074 } 3075 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3076 ierr = PetscFree(vals);CHKERRQ(ierr); 3077 ierr = PetscFree(mycols);CHKERRQ(ierr); 3078 ierr = PetscFree(rowners);CHKERRQ(ierr); 3079 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3080 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3081 PetscFunctionReturn(0); 3082 } 3083 3084 /* Not scalable because of ISAllGather() unless getting all columns. */ 3085 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3086 { 3087 PetscErrorCode ierr; 3088 IS iscol_local; 3089 PetscBool isstride; 3090 PetscMPIInt lisstride=0,gisstride; 3091 3092 PetscFunctionBegin; 3093 /* check if we are grabbing all columns*/ 3094 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3095 3096 if (isstride) { 3097 PetscInt start,len,mstart,mlen; 3098 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3099 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3100 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3101 if (mstart == start && mlen-mstart == len) lisstride = 1; 3102 } 3103 3104 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3105 if (gisstride) { 3106 PetscInt N; 3107 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3108 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3109 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3110 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3111 } else { 3112 PetscInt cbs; 3113 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3114 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3115 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3116 } 3117 3118 *isseq = iscol_local; 3119 PetscFunctionReturn(0); 3120 } 3121 3122 /* 3123 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3124 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3125 3126 Input Parameters: 3127 mat - matrix 3128 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3129 i.e., mat->rstart <= isrow[i] < mat->rend 3130 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3131 i.e., mat->cstart <= iscol[i] < mat->cend 3132 Output Parameter: 3133 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3134 iscol_o - sequential column index set for retrieving mat->B 3135 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3136 */ 3137 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3138 { 3139 PetscErrorCode ierr; 3140 Vec x,cmap; 3141 const PetscInt *is_idx; 3142 PetscScalar *xarray,*cmaparray; 3143 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3144 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3145 Mat B=a->B; 3146 Vec lvec=a->lvec,lcmap; 3147 PetscInt i,cstart,cend,Bn=B->cmap->N; 3148 MPI_Comm comm; 3149 VecScatter Mvctx=a->Mvctx; 3150 3151 PetscFunctionBegin; 3152 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3153 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3154 3155 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3156 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3157 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3158 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3159 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3160 3161 /* Get start indices */ 3162 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3163 isstart -= ncols; 3164 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3165 3166 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3167 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3168 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3169 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3170 for (i=0; i<ncols; i++) { 3171 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3172 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3173 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3174 } 3175 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3176 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3177 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3178 3179 /* Get iscol_d */ 3180 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3181 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3182 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3183 3184 /* Get isrow_d */ 3185 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3186 rstart = mat->rmap->rstart; 3187 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3188 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3189 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3190 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3191 3192 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3193 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3194 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3195 3196 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3197 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3198 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3199 3200 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3201 3202 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3203 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3204 3205 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3206 /* off-process column indices */ 3207 count = 0; 3208 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3209 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3210 3211 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3212 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3213 for (i=0; i<Bn; i++) { 3214 if (PetscRealPart(xarray[i]) > -1.0) { 3215 idx[count] = i; /* local column index in off-diagonal part B */ 3216 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3217 count++; 3218 } 3219 } 3220 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3221 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3222 3223 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3224 /* cannot ensure iscol_o has same blocksize as iscol! */ 3225 3226 ierr = PetscFree(idx);CHKERRQ(ierr); 3227 *garray = cmap1; 3228 3229 ierr = VecDestroy(&x);CHKERRQ(ierr); 3230 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3231 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3232 PetscFunctionReturn(0); 3233 } 3234 3235 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3236 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3237 { 3238 PetscErrorCode ierr; 3239 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3240 Mat M = NULL; 3241 MPI_Comm comm; 3242 IS iscol_d,isrow_d,iscol_o; 3243 Mat Asub = NULL,Bsub = NULL; 3244 PetscInt n; 3245 3246 PetscFunctionBegin; 3247 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3248 3249 if (call == MAT_REUSE_MATRIX) { 3250 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3251 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3252 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3253 3254 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3255 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3256 3257 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3258 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3259 3260 /* Update diagonal and off-diagonal portions of submat */ 3261 asub = (Mat_MPIAIJ*)(*submat)->data; 3262 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3263 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3264 if (n) { 3265 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3266 } 3267 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3268 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3269 3270 } else { /* call == MAT_INITIAL_MATRIX) */ 3271 const PetscInt *garray; 3272 PetscInt BsubN; 3273 3274 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3275 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3276 3277 /* Create local submatrices Asub and Bsub */ 3278 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3279 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3280 3281 /* Create submatrix M */ 3282 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3283 3284 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3285 asub = (Mat_MPIAIJ*)M->data; 3286 3287 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3288 n = asub->B->cmap->N; 3289 if (BsubN > n) { 3290 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3291 const PetscInt *idx; 3292 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3293 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3294 3295 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3296 j = 0; 3297 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3298 for (i=0; i<n; i++) { 3299 if (j >= BsubN) break; 3300 while (subgarray[i] > garray[j]) j++; 3301 3302 if (subgarray[i] == garray[j]) { 3303 idx_new[i] = idx[j++]; 3304 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3305 } 3306 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3307 3308 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3309 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3310 3311 } else if (BsubN < n) { 3312 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3313 } 3314 3315 ierr = PetscFree(garray);CHKERRQ(ierr); 3316 *submat = M; 3317 3318 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3319 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3320 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3321 3322 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3323 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3324 3325 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3326 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3327 } 3328 PetscFunctionReturn(0); 3329 } 3330 3331 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3332 { 3333 PetscErrorCode ierr; 3334 IS iscol_local=NULL,isrow_d; 3335 PetscInt csize; 3336 PetscInt n,i,j,start,end; 3337 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3338 MPI_Comm comm; 3339 3340 PetscFunctionBegin; 3341 /* If isrow has same processor distribution as mat, 3342 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3343 if (call == MAT_REUSE_MATRIX) { 3344 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3345 if (isrow_d) { 3346 sameRowDist = PETSC_TRUE; 3347 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3348 } else { 3349 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3350 if (iscol_local) { 3351 sameRowDist = PETSC_TRUE; 3352 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3353 } 3354 } 3355 } else { 3356 /* Check if isrow has same processor distribution as mat */ 3357 sameDist[0] = PETSC_FALSE; 3358 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3359 if (!n) { 3360 sameDist[0] = PETSC_TRUE; 3361 } else { 3362 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3363 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3364 if (i >= start && j < end) { 3365 sameDist[0] = PETSC_TRUE; 3366 } 3367 } 3368 3369 /* Check if iscol has same processor distribution as mat */ 3370 sameDist[1] = PETSC_FALSE; 3371 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3372 if (!n) { 3373 sameDist[1] = PETSC_TRUE; 3374 } else { 3375 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3376 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3377 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3378 } 3379 3380 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3381 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3382 sameRowDist = tsameDist[0]; 3383 } 3384 3385 if (sameRowDist) { 3386 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3387 /* isrow and iscol have same processor distribution as mat */ 3388 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3389 PetscFunctionReturn(0); 3390 } else { /* sameRowDist */ 3391 /* isrow has same processor distribution as mat */ 3392 if (call == MAT_INITIAL_MATRIX) { 3393 PetscBool sorted; 3394 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3395 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3396 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3397 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3398 3399 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3400 if (sorted) { 3401 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3402 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3403 PetscFunctionReturn(0); 3404 } 3405 } else { /* call == MAT_REUSE_MATRIX */ 3406 IS iscol_sub; 3407 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3408 if (iscol_sub) { 3409 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 } 3413 } 3414 } 3415 3416 /* General case: iscol -> iscol_local which has global size of iscol */ 3417 if (call == MAT_REUSE_MATRIX) { 3418 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3419 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3420 } else { 3421 if (!iscol_local) { 3422 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3423 } 3424 } 3425 3426 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3427 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3428 3429 if (call == MAT_INITIAL_MATRIX) { 3430 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3431 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3432 } 3433 PetscFunctionReturn(0); 3434 } 3435 3436 /*@C 3437 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3438 and "off-diagonal" part of the matrix in CSR format. 3439 3440 Collective on MPI_Comm 3441 3442 Input Parameters: 3443 + comm - MPI communicator 3444 . A - "diagonal" portion of matrix 3445 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3446 - garray - global index of B columns 3447 3448 Output Parameter: 3449 . mat - the matrix, with input A as its local diagonal matrix 3450 Level: advanced 3451 3452 Notes: 3453 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3454 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3455 3456 .seealso: MatCreateMPIAIJWithSplitArrays() 3457 @*/ 3458 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3459 { 3460 PetscErrorCode ierr; 3461 Mat_MPIAIJ *maij; 3462 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3463 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3464 PetscScalar *oa=b->a; 3465 Mat Bnew; 3466 PetscInt m,n,N; 3467 3468 PetscFunctionBegin; 3469 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3470 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3471 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3472 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3473 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3474 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3475 3476 /* Get global columns of mat */ 3477 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3478 3479 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3480 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3481 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3482 maij = (Mat_MPIAIJ*)(*mat)->data; 3483 3484 (*mat)->preallocated = PETSC_TRUE; 3485 3486 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3487 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3488 3489 /* Set A as diagonal portion of *mat */ 3490 maij->A = A; 3491 3492 nz = oi[m]; 3493 for (i=0; i<nz; i++) { 3494 col = oj[i]; 3495 oj[i] = garray[col]; 3496 } 3497 3498 /* Set Bnew as off-diagonal portion of *mat */ 3499 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3500 bnew = (Mat_SeqAIJ*)Bnew->data; 3501 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3502 maij->B = Bnew; 3503 3504 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3505 3506 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3507 b->free_a = PETSC_FALSE; 3508 b->free_ij = PETSC_FALSE; 3509 ierr = MatDestroy(&B);CHKERRQ(ierr); 3510 3511 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3512 bnew->free_a = PETSC_TRUE; 3513 bnew->free_ij = PETSC_TRUE; 3514 3515 /* condense columns of maij->B */ 3516 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3517 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3519 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3520 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3521 PetscFunctionReturn(0); 3522 } 3523 3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3525 3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3527 { 3528 PetscErrorCode ierr; 3529 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3530 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3531 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3532 Mat M,Msub,B=a->B; 3533 MatScalar *aa; 3534 Mat_SeqAIJ *aij; 3535 PetscInt *garray = a->garray,*colsub,Ncols; 3536 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3537 IS iscol_sub,iscmap; 3538 const PetscInt *is_idx,*cmap; 3539 PetscBool allcolumns=PETSC_FALSE; 3540 MPI_Comm comm; 3541 3542 PetscFunctionBegin; 3543 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3544 3545 if (call == MAT_REUSE_MATRIX) { 3546 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3547 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3548 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3549 3550 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3551 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3552 3553 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3554 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3555 3556 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3557 3558 } else { /* call == MAT_INITIAL_MATRIX) */ 3559 PetscBool flg; 3560 3561 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3562 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3563 3564 /* (1) iscol -> nonscalable iscol_local */ 3565 /* Check for special case: each processor gets entire matrix columns */ 3566 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3567 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3568 if (allcolumns) { 3569 iscol_sub = iscol_local; 3570 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3571 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3572 3573 } else { 3574 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3575 PetscInt *idx,*cmap1,k; 3576 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3577 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3578 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3579 count = 0; 3580 k = 0; 3581 for (i=0; i<Ncols; i++) { 3582 j = is_idx[i]; 3583 if (j >= cstart && j < cend) { 3584 /* diagonal part of mat */ 3585 idx[count] = j; 3586 cmap1[count++] = i; /* column index in submat */ 3587 } else if (Bn) { 3588 /* off-diagonal part of mat */ 3589 if (j == garray[k]) { 3590 idx[count] = j; 3591 cmap1[count++] = i; /* column index in submat */ 3592 } else if (j > garray[k]) { 3593 while (j > garray[k] && k < Bn-1) k++; 3594 if (j == garray[k]) { 3595 idx[count] = j; 3596 cmap1[count++] = i; /* column index in submat */ 3597 } 3598 } 3599 } 3600 } 3601 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3602 3603 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3604 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3605 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3606 3607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3608 } 3609 3610 /* (3) Create sequential Msub */ 3611 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3612 } 3613 3614 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3615 aij = (Mat_SeqAIJ*)(Msub)->data; 3616 ii = aij->i; 3617 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3618 3619 /* 3620 m - number of local rows 3621 Ncols - number of columns (same on all processors) 3622 rstart - first row in new global matrix generated 3623 */ 3624 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3625 3626 if (call == MAT_INITIAL_MATRIX) { 3627 /* (4) Create parallel newmat */ 3628 PetscMPIInt rank,size; 3629 PetscInt csize; 3630 3631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3632 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3633 3634 /* 3635 Determine the number of non-zeros in the diagonal and off-diagonal 3636 portions of the matrix in order to do correct preallocation 3637 */ 3638 3639 /* first get start and end of "diagonal" columns */ 3640 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3641 if (csize == PETSC_DECIDE) { 3642 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3643 if (mglobal == Ncols) { /* square matrix */ 3644 nlocal = m; 3645 } else { 3646 nlocal = Ncols/size + ((Ncols % size) > rank); 3647 } 3648 } else { 3649 nlocal = csize; 3650 } 3651 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3652 rstart = rend - nlocal; 3653 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3654 3655 /* next, compute all the lengths */ 3656 jj = aij->j; 3657 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3658 olens = dlens + m; 3659 for (i=0; i<m; i++) { 3660 jend = ii[i+1] - ii[i]; 3661 olen = 0; 3662 dlen = 0; 3663 for (j=0; j<jend; j++) { 3664 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3665 else dlen++; 3666 jj++; 3667 } 3668 olens[i] = olen; 3669 dlens[i] = dlen; 3670 } 3671 3672 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3673 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3674 3675 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3676 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3677 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3678 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3679 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3680 ierr = PetscFree(dlens);CHKERRQ(ierr); 3681 3682 } else { /* call == MAT_REUSE_MATRIX */ 3683 M = *newmat; 3684 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3685 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3686 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3687 /* 3688 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3689 rather than the slower MatSetValues(). 3690 */ 3691 M->was_assembled = PETSC_TRUE; 3692 M->assembled = PETSC_FALSE; 3693 } 3694 3695 /* (5) Set values of Msub to *newmat */ 3696 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3697 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3698 3699 jj = aij->j; 3700 aa = aij->a; 3701 for (i=0; i<m; i++) { 3702 row = rstart + i; 3703 nz = ii[i+1] - ii[i]; 3704 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3705 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3706 jj += nz; aa += nz; 3707 } 3708 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3709 3710 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3712 3713 ierr = PetscFree(colsub);CHKERRQ(ierr); 3714 3715 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3716 if (call == MAT_INITIAL_MATRIX) { 3717 *newmat = M; 3718 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3719 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3720 3721 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3722 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3723 3724 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3725 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3726 3727 if (iscol_local) { 3728 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3729 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3730 } 3731 } 3732 PetscFunctionReturn(0); 3733 } 3734 3735 /* 3736 Not great since it makes two copies of the submatrix, first an SeqAIJ 3737 in local and then by concatenating the local matrices the end result. 3738 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3739 3740 Note: This requires a sequential iscol with all indices. 3741 */ 3742 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3743 { 3744 PetscErrorCode ierr; 3745 PetscMPIInt rank,size; 3746 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3747 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3748 Mat M,Mreuse; 3749 MatScalar *aa,*vwork; 3750 MPI_Comm comm; 3751 Mat_SeqAIJ *aij; 3752 PetscBool colflag,allcolumns=PETSC_FALSE; 3753 3754 PetscFunctionBegin; 3755 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3756 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3757 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3758 3759 /* Check for special case: each processor gets entire matrix columns */ 3760 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3761 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3762 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3763 3764 if (call == MAT_REUSE_MATRIX) { 3765 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3766 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3767 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3768 } else { 3769 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3770 } 3771 3772 /* 3773 m - number of local rows 3774 n - number of columns (same on all processors) 3775 rstart - first row in new global matrix generated 3776 */ 3777 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3778 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3779 if (call == MAT_INITIAL_MATRIX) { 3780 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3781 ii = aij->i; 3782 jj = aij->j; 3783 3784 /* 3785 Determine the number of non-zeros in the diagonal and off-diagonal 3786 portions of the matrix in order to do correct preallocation 3787 */ 3788 3789 /* first get start and end of "diagonal" columns */ 3790 if (csize == PETSC_DECIDE) { 3791 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3792 if (mglobal == n) { /* square matrix */ 3793 nlocal = m; 3794 } else { 3795 nlocal = n/size + ((n % size) > rank); 3796 } 3797 } else { 3798 nlocal = csize; 3799 } 3800 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3801 rstart = rend - nlocal; 3802 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3803 3804 /* next, compute all the lengths */ 3805 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3806 olens = dlens + m; 3807 for (i=0; i<m; i++) { 3808 jend = ii[i+1] - ii[i]; 3809 olen = 0; 3810 dlen = 0; 3811 for (j=0; j<jend; j++) { 3812 if (*jj < rstart || *jj >= rend) olen++; 3813 else dlen++; 3814 jj++; 3815 } 3816 olens[i] = olen; 3817 dlens[i] = dlen; 3818 } 3819 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3820 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3821 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3822 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3823 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3824 ierr = PetscFree(dlens);CHKERRQ(ierr); 3825 } else { 3826 PetscInt ml,nl; 3827 3828 M = *newmat; 3829 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3830 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3831 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3832 /* 3833 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3834 rather than the slower MatSetValues(). 3835 */ 3836 M->was_assembled = PETSC_TRUE; 3837 M->assembled = PETSC_FALSE; 3838 } 3839 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3840 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3841 ii = aij->i; 3842 jj = aij->j; 3843 aa = aij->a; 3844 for (i=0; i<m; i++) { 3845 row = rstart + i; 3846 nz = ii[i+1] - ii[i]; 3847 cwork = jj; jj += nz; 3848 vwork = aa; aa += nz; 3849 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3850 } 3851 3852 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3853 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3854 *newmat = M; 3855 3856 /* save submatrix used in processor for next request */ 3857 if (call == MAT_INITIAL_MATRIX) { 3858 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3859 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3860 } 3861 PetscFunctionReturn(0); 3862 } 3863 3864 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3865 { 3866 PetscInt m,cstart, cend,j,nnz,i,d; 3867 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3868 const PetscInt *JJ; 3869 PetscScalar *values; 3870 PetscErrorCode ierr; 3871 PetscBool nooffprocentries; 3872 3873 PetscFunctionBegin; 3874 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3875 3876 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3877 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3878 m = B->rmap->n; 3879 cstart = B->cmap->rstart; 3880 cend = B->cmap->rend; 3881 rstart = B->rmap->rstart; 3882 3883 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3884 3885 #if defined(PETSC_USE_DEBUG) 3886 for (i=0; i<m && Ii; i++) { 3887 nnz = Ii[i+1]- Ii[i]; 3888 JJ = J + Ii[i]; 3889 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3890 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3891 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3892 } 3893 #endif 3894 3895 for (i=0; i<m && Ii; i++) { 3896 nnz = Ii[i+1]- Ii[i]; 3897 JJ = J + Ii[i]; 3898 nnz_max = PetscMax(nnz_max,nnz); 3899 d = 0; 3900 for (j=0; j<nnz; j++) { 3901 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3902 } 3903 d_nnz[i] = d; 3904 o_nnz[i] = nnz - d; 3905 } 3906 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3907 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3908 3909 if (v) values = (PetscScalar*)v; 3910 else { 3911 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3912 } 3913 3914 for (i=0; i<m && Ii; i++) { 3915 ii = i + rstart; 3916 nnz = Ii[i+1]- Ii[i]; 3917 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3918 } 3919 nooffprocentries = B->nooffprocentries; 3920 B->nooffprocentries = PETSC_TRUE; 3921 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3922 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3923 B->nooffprocentries = nooffprocentries; 3924 3925 if (!v) { 3926 ierr = PetscFree(values);CHKERRQ(ierr); 3927 } 3928 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3929 PetscFunctionReturn(0); 3930 } 3931 3932 /*@ 3933 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3934 (the default parallel PETSc format). 3935 3936 Collective on MPI_Comm 3937 3938 Input Parameters: 3939 + B - the matrix 3940 . i - the indices into j for the start of each local row (starts with zero) 3941 . j - the column indices for each local row (starts with zero) 3942 - v - optional values in the matrix 3943 3944 Level: developer 3945 3946 Notes: 3947 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3948 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3949 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3950 3951 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3952 3953 The format which is used for the sparse matrix input, is equivalent to a 3954 row-major ordering.. i.e for the following matrix, the input data expected is 3955 as shown 3956 3957 $ 1 0 0 3958 $ 2 0 3 P0 3959 $ ------- 3960 $ 4 5 6 P1 3961 $ 3962 $ Process0 [P0]: rows_owned=[0,1] 3963 $ i = {0,1,3} [size = nrow+1 = 2+1] 3964 $ j = {0,0,2} [size = 3] 3965 $ v = {1,2,3} [size = 3] 3966 $ 3967 $ Process1 [P1]: rows_owned=[2] 3968 $ i = {0,3} [size = nrow+1 = 1+1] 3969 $ j = {0,1,2} [size = 3] 3970 $ v = {4,5,6} [size = 3] 3971 3972 .keywords: matrix, aij, compressed row, sparse, parallel 3973 3974 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3975 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3976 @*/ 3977 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3978 { 3979 PetscErrorCode ierr; 3980 3981 PetscFunctionBegin; 3982 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3983 PetscFunctionReturn(0); 3984 } 3985 3986 /*@C 3987 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3988 (the default parallel PETSc format). For good matrix assembly performance 3989 the user should preallocate the matrix storage by setting the parameters 3990 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3991 performance can be increased by more than a factor of 50. 3992 3993 Collective on MPI_Comm 3994 3995 Input Parameters: 3996 + B - the matrix 3997 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3998 (same value is used for all local rows) 3999 . d_nnz - array containing the number of nonzeros in the various rows of the 4000 DIAGONAL portion of the local submatrix (possibly different for each row) 4001 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4002 The size of this array is equal to the number of local rows, i.e 'm'. 4003 For matrices that will be factored, you must leave room for (and set) 4004 the diagonal entry even if it is zero. 4005 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4006 submatrix (same value is used for all local rows). 4007 - o_nnz - array containing the number of nonzeros in the various rows of the 4008 OFF-DIAGONAL portion of the local submatrix (possibly different for 4009 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4010 structure. The size of this array is equal to the number 4011 of local rows, i.e 'm'. 4012 4013 If the *_nnz parameter is given then the *_nz parameter is ignored 4014 4015 The AIJ format (also called the Yale sparse matrix format or 4016 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4017 storage. The stored row and column indices begin with zero. 4018 See Users-Manual: ch_mat for details. 4019 4020 The parallel matrix is partitioned such that the first m0 rows belong to 4021 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4022 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4023 4024 The DIAGONAL portion of the local submatrix of a processor can be defined 4025 as the submatrix which is obtained by extraction the part corresponding to 4026 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4027 first row that belongs to the processor, r2 is the last row belonging to 4028 the this processor, and c1-c2 is range of indices of the local part of a 4029 vector suitable for applying the matrix to. This is an mxn matrix. In the 4030 common case of a square matrix, the row and column ranges are the same and 4031 the DIAGONAL part is also square. The remaining portion of the local 4032 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4033 4034 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4035 4036 You can call MatGetInfo() to get information on how effective the preallocation was; 4037 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4038 You can also run with the option -info and look for messages with the string 4039 malloc in them to see if additional memory allocation was needed. 4040 4041 Example usage: 4042 4043 Consider the following 8x8 matrix with 34 non-zero values, that is 4044 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4045 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4046 as follows: 4047 4048 .vb 4049 1 2 0 | 0 3 0 | 0 4 4050 Proc0 0 5 6 | 7 0 0 | 8 0 4051 9 0 10 | 11 0 0 | 12 0 4052 ------------------------------------- 4053 13 0 14 | 15 16 17 | 0 0 4054 Proc1 0 18 0 | 19 20 21 | 0 0 4055 0 0 0 | 22 23 0 | 24 0 4056 ------------------------------------- 4057 Proc2 25 26 27 | 0 0 28 | 29 0 4058 30 0 0 | 31 32 33 | 0 34 4059 .ve 4060 4061 This can be represented as a collection of submatrices as: 4062 4063 .vb 4064 A B C 4065 D E F 4066 G H I 4067 .ve 4068 4069 Where the submatrices A,B,C are owned by proc0, D,E,F are 4070 owned by proc1, G,H,I are owned by proc2. 4071 4072 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4073 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4074 The 'M','N' parameters are 8,8, and have the same values on all procs. 4075 4076 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4077 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4078 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4079 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4080 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4081 matrix, ans [DF] as another SeqAIJ matrix. 4082 4083 When d_nz, o_nz parameters are specified, d_nz storage elements are 4084 allocated for every row of the local diagonal submatrix, and o_nz 4085 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4086 One way to choose d_nz and o_nz is to use the max nonzerors per local 4087 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4088 In this case, the values of d_nz,o_nz are: 4089 .vb 4090 proc0 : dnz = 2, o_nz = 2 4091 proc1 : dnz = 3, o_nz = 2 4092 proc2 : dnz = 1, o_nz = 4 4093 .ve 4094 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4095 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4096 for proc3. i.e we are using 12+15+10=37 storage locations to store 4097 34 values. 4098 4099 When d_nnz, o_nnz parameters are specified, the storage is specified 4100 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4101 In the above case the values for d_nnz,o_nnz are: 4102 .vb 4103 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4104 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4105 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4106 .ve 4107 Here the space allocated is sum of all the above values i.e 34, and 4108 hence pre-allocation is perfect. 4109 4110 Level: intermediate 4111 4112 .keywords: matrix, aij, compressed row, sparse, parallel 4113 4114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4115 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4116 @*/ 4117 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4118 { 4119 PetscErrorCode ierr; 4120 4121 PetscFunctionBegin; 4122 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4123 PetscValidType(B,1); 4124 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4125 PetscFunctionReturn(0); 4126 } 4127 4128 /*@ 4129 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4130 CSR format the local rows. 4131 4132 Collective on MPI_Comm 4133 4134 Input Parameters: 4135 + comm - MPI communicator 4136 . m - number of local rows (Cannot be PETSC_DECIDE) 4137 . n - This value should be the same as the local size used in creating the 4138 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4139 calculated if N is given) For square matrices n is almost always m. 4140 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4141 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4142 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4143 . j - column indices 4144 - a - matrix values 4145 4146 Output Parameter: 4147 . mat - the matrix 4148 4149 Level: intermediate 4150 4151 Notes: 4152 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4153 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4154 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4155 4156 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4157 4158 The format which is used for the sparse matrix input, is equivalent to a 4159 row-major ordering.. i.e for the following matrix, the input data expected is 4160 as shown 4161 4162 $ 1 0 0 4163 $ 2 0 3 P0 4164 $ ------- 4165 $ 4 5 6 P1 4166 $ 4167 $ Process0 [P0]: rows_owned=[0,1] 4168 $ i = {0,1,3} [size = nrow+1 = 2+1] 4169 $ j = {0,0,2} [size = 3] 4170 $ v = {1,2,3} [size = 3] 4171 $ 4172 $ Process1 [P1]: rows_owned=[2] 4173 $ i = {0,3} [size = nrow+1 = 1+1] 4174 $ j = {0,1,2} [size = 3] 4175 $ v = {4,5,6} [size = 3] 4176 4177 .keywords: matrix, aij, compressed row, sparse, parallel 4178 4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4180 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4181 @*/ 4182 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4183 { 4184 PetscErrorCode ierr; 4185 4186 PetscFunctionBegin; 4187 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4188 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4189 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4190 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4191 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4192 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4193 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4194 PetscFunctionReturn(0); 4195 } 4196 4197 /*@C 4198 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4199 (the default parallel PETSc format). For good matrix assembly performance 4200 the user should preallocate the matrix storage by setting the parameters 4201 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4202 performance can be increased by more than a factor of 50. 4203 4204 Collective on MPI_Comm 4205 4206 Input Parameters: 4207 + comm - MPI communicator 4208 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4209 This value should be the same as the local size used in creating the 4210 y vector for the matrix-vector product y = Ax. 4211 . n - This value should be the same as the local size used in creating the 4212 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4213 calculated if N is given) For square matrices n is almost always m. 4214 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4215 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4216 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4217 (same value is used for all local rows) 4218 . d_nnz - array containing the number of nonzeros in the various rows of the 4219 DIAGONAL portion of the local submatrix (possibly different for each row) 4220 or NULL, if d_nz is used to specify the nonzero structure. 4221 The size of this array is equal to the number of local rows, i.e 'm'. 4222 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4223 submatrix (same value is used for all local rows). 4224 - o_nnz - array containing the number of nonzeros in the various rows of the 4225 OFF-DIAGONAL portion of the local submatrix (possibly different for 4226 each row) or NULL, if o_nz is used to specify the nonzero 4227 structure. The size of this array is equal to the number 4228 of local rows, i.e 'm'. 4229 4230 Output Parameter: 4231 . A - the matrix 4232 4233 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4234 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4235 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4236 4237 Notes: 4238 If the *_nnz parameter is given then the *_nz parameter is ignored 4239 4240 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4241 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4242 storage requirements for this matrix. 4243 4244 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4245 processor than it must be used on all processors that share the object for 4246 that argument. 4247 4248 The user MUST specify either the local or global matrix dimensions 4249 (possibly both). 4250 4251 The parallel matrix is partitioned across processors such that the 4252 first m0 rows belong to process 0, the next m1 rows belong to 4253 process 1, the next m2 rows belong to process 2 etc.. where 4254 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4255 values corresponding to [m x N] submatrix. 4256 4257 The columns are logically partitioned with the n0 columns belonging 4258 to 0th partition, the next n1 columns belonging to the next 4259 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4260 4261 The DIAGONAL portion of the local submatrix on any given processor 4262 is the submatrix corresponding to the rows and columns m,n 4263 corresponding to the given processor. i.e diagonal matrix on 4264 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4265 etc. The remaining portion of the local submatrix [m x (N-n)] 4266 constitute the OFF-DIAGONAL portion. The example below better 4267 illustrates this concept. 4268 4269 For a square global matrix we define each processor's diagonal portion 4270 to be its local rows and the corresponding columns (a square submatrix); 4271 each processor's off-diagonal portion encompasses the remainder of the 4272 local matrix (a rectangular submatrix). 4273 4274 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4275 4276 When calling this routine with a single process communicator, a matrix of 4277 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4278 type of communicator, use the construction mechanism 4279 .vb 4280 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4281 .ve 4282 4283 $ MatCreate(...,&A); 4284 $ MatSetType(A,MATMPIAIJ); 4285 $ MatSetSizes(A, m,n,M,N); 4286 $ MatMPIAIJSetPreallocation(A,...); 4287 4288 By default, this format uses inodes (identical nodes) when possible. 4289 We search for consecutive rows with the same nonzero structure, thereby 4290 reusing matrix information to achieve increased efficiency. 4291 4292 Options Database Keys: 4293 + -mat_no_inode - Do not use inodes 4294 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4295 4296 4297 4298 Example usage: 4299 4300 Consider the following 8x8 matrix with 34 non-zero values, that is 4301 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4302 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4303 as follows 4304 4305 .vb 4306 1 2 0 | 0 3 0 | 0 4 4307 Proc0 0 5 6 | 7 0 0 | 8 0 4308 9 0 10 | 11 0 0 | 12 0 4309 ------------------------------------- 4310 13 0 14 | 15 16 17 | 0 0 4311 Proc1 0 18 0 | 19 20 21 | 0 0 4312 0 0 0 | 22 23 0 | 24 0 4313 ------------------------------------- 4314 Proc2 25 26 27 | 0 0 28 | 29 0 4315 30 0 0 | 31 32 33 | 0 34 4316 .ve 4317 4318 This can be represented as a collection of submatrices as 4319 4320 .vb 4321 A B C 4322 D E F 4323 G H I 4324 .ve 4325 4326 Where the submatrices A,B,C are owned by proc0, D,E,F are 4327 owned by proc1, G,H,I are owned by proc2. 4328 4329 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4330 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4331 The 'M','N' parameters are 8,8, and have the same values on all procs. 4332 4333 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4334 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4335 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4336 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4337 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4338 matrix, ans [DF] as another SeqAIJ matrix. 4339 4340 When d_nz, o_nz parameters are specified, d_nz storage elements are 4341 allocated for every row of the local diagonal submatrix, and o_nz 4342 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4343 One way to choose d_nz and o_nz is to use the max nonzerors per local 4344 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4345 In this case, the values of d_nz,o_nz are 4346 .vb 4347 proc0 : dnz = 2, o_nz = 2 4348 proc1 : dnz = 3, o_nz = 2 4349 proc2 : dnz = 1, o_nz = 4 4350 .ve 4351 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4352 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4353 for proc3. i.e we are using 12+15+10=37 storage locations to store 4354 34 values. 4355 4356 When d_nnz, o_nnz parameters are specified, the storage is specified 4357 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4358 In the above case the values for d_nnz,o_nnz are 4359 .vb 4360 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4361 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4362 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4363 .ve 4364 Here the space allocated is sum of all the above values i.e 34, and 4365 hence pre-allocation is perfect. 4366 4367 Level: intermediate 4368 4369 .keywords: matrix, aij, compressed row, sparse, parallel 4370 4371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4372 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4373 @*/ 4374 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4375 { 4376 PetscErrorCode ierr; 4377 PetscMPIInt size; 4378 4379 PetscFunctionBegin; 4380 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4381 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4382 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4383 if (size > 1) { 4384 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4385 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4386 } else { 4387 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4388 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4389 } 4390 PetscFunctionReturn(0); 4391 } 4392 4393 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4394 { 4395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4396 PetscBool flg; 4397 PetscErrorCode ierr; 4398 4399 PetscFunctionBegin; 4400 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4401 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4402 if (Ad) *Ad = a->A; 4403 if (Ao) *Ao = a->B; 4404 if (colmap) *colmap = a->garray; 4405 PetscFunctionReturn(0); 4406 } 4407 4408 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4409 { 4410 PetscErrorCode ierr; 4411 PetscInt m,N,i,rstart,nnz,Ii; 4412 PetscInt *indx; 4413 PetscScalar *values; 4414 4415 PetscFunctionBegin; 4416 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4418 PetscInt *dnz,*onz,sum,bs,cbs; 4419 4420 if (n == PETSC_DECIDE) { 4421 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4422 } 4423 /* Check sum(n) = N */ 4424 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4425 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4426 4427 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4428 rstart -= m; 4429 4430 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4431 for (i=0; i<m; i++) { 4432 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4433 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4434 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4435 } 4436 4437 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4438 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4439 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4440 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4441 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4442 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4443 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4444 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4445 } 4446 4447 /* numeric phase */ 4448 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4449 for (i=0; i<m; i++) { 4450 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4451 Ii = i + rstart; 4452 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4453 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4454 } 4455 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4456 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 PetscFunctionReturn(0); 4458 } 4459 4460 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4461 { 4462 PetscErrorCode ierr; 4463 PetscMPIInt rank; 4464 PetscInt m,N,i,rstart,nnz; 4465 size_t len; 4466 const PetscInt *indx; 4467 PetscViewer out; 4468 char *name; 4469 Mat B; 4470 const PetscScalar *values; 4471 4472 PetscFunctionBegin; 4473 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4474 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4475 /* Should this be the type of the diagonal block of A? */ 4476 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4477 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4478 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4479 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4480 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4481 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4482 for (i=0; i<m; i++) { 4483 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4484 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4485 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4486 } 4487 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4489 4490 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4491 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4492 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4493 sprintf(name,"%s.%d",outfile,rank); 4494 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4495 ierr = PetscFree(name);CHKERRQ(ierr); 4496 ierr = MatView(B,out);CHKERRQ(ierr); 4497 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4498 ierr = MatDestroy(&B);CHKERRQ(ierr); 4499 PetscFunctionReturn(0); 4500 } 4501 4502 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4503 { 4504 PetscErrorCode ierr; 4505 Mat_Merge_SeqsToMPI *merge; 4506 PetscContainer container; 4507 4508 PetscFunctionBegin; 4509 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4510 if (container) { 4511 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4513 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4514 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4515 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4516 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4517 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4518 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4519 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4520 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4521 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4522 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4524 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4525 ierr = PetscFree(merge);CHKERRQ(ierr); 4526 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4527 } 4528 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #include <../src/mat/utils/freespace.h> 4533 #include <petscbt.h> 4534 4535 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4536 { 4537 PetscErrorCode ierr; 4538 MPI_Comm comm; 4539 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4540 PetscMPIInt size,rank,taga,*len_s; 4541 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4542 PetscInt proc,m; 4543 PetscInt **buf_ri,**buf_rj; 4544 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4545 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4546 MPI_Request *s_waits,*r_waits; 4547 MPI_Status *status; 4548 MatScalar *aa=a->a; 4549 MatScalar **abuf_r,*ba_i; 4550 Mat_Merge_SeqsToMPI *merge; 4551 PetscContainer container; 4552 4553 PetscFunctionBegin; 4554 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4555 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4556 4557 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4558 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4559 4560 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4561 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4562 4563 bi = merge->bi; 4564 bj = merge->bj; 4565 buf_ri = merge->buf_ri; 4566 buf_rj = merge->buf_rj; 4567 4568 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4569 owners = merge->rowmap->range; 4570 len_s = merge->len_s; 4571 4572 /* send and recv matrix values */ 4573 /*-----------------------------*/ 4574 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4575 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4576 4577 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4578 for (proc=0,k=0; proc<size; proc++) { 4579 if (!len_s[proc]) continue; 4580 i = owners[proc]; 4581 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4582 k++; 4583 } 4584 4585 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4586 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4587 ierr = PetscFree(status);CHKERRQ(ierr); 4588 4589 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4590 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4591 4592 /* insert mat values of mpimat */ 4593 /*----------------------------*/ 4594 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4595 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4596 4597 for (k=0; k<merge->nrecv; k++) { 4598 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4599 nrows = *(buf_ri_k[k]); 4600 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4601 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4602 } 4603 4604 /* set values of ba */ 4605 m = merge->rowmap->n; 4606 for (i=0; i<m; i++) { 4607 arow = owners[rank] + i; 4608 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4609 bnzi = bi[i+1] - bi[i]; 4610 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4611 4612 /* add local non-zero vals of this proc's seqmat into ba */ 4613 anzi = ai[arow+1] - ai[arow]; 4614 aj = a->j + ai[arow]; 4615 aa = a->a + ai[arow]; 4616 nextaj = 0; 4617 for (j=0; nextaj<anzi; j++) { 4618 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4619 ba_i[j] += aa[nextaj++]; 4620 } 4621 } 4622 4623 /* add received vals into ba */ 4624 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4625 /* i-th row */ 4626 if (i == *nextrow[k]) { 4627 anzi = *(nextai[k]+1) - *nextai[k]; 4628 aj = buf_rj[k] + *(nextai[k]); 4629 aa = abuf_r[k] + *(nextai[k]); 4630 nextaj = 0; 4631 for (j=0; nextaj<anzi; j++) { 4632 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4633 ba_i[j] += aa[nextaj++]; 4634 } 4635 } 4636 nextrow[k]++; nextai[k]++; 4637 } 4638 } 4639 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4640 } 4641 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4642 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4643 4644 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4645 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4646 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4647 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4648 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4653 { 4654 PetscErrorCode ierr; 4655 Mat B_mpi; 4656 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4657 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4658 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4659 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4660 PetscInt len,proc,*dnz,*onz,bs,cbs; 4661 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4662 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4663 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4664 MPI_Status *status; 4665 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4666 PetscBT lnkbt; 4667 Mat_Merge_SeqsToMPI *merge; 4668 PetscContainer container; 4669 4670 PetscFunctionBegin; 4671 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4672 4673 /* make sure it is a PETSc comm */ 4674 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4677 4678 ierr = PetscNew(&merge);CHKERRQ(ierr); 4679 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4680 4681 /* determine row ownership */ 4682 /*---------------------------------------------------------*/ 4683 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4684 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4685 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4686 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4687 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4688 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4689 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4690 4691 m = merge->rowmap->n; 4692 owners = merge->rowmap->range; 4693 4694 /* determine the number of messages to send, their lengths */ 4695 /*---------------------------------------------------------*/ 4696 len_s = merge->len_s; 4697 4698 len = 0; /* length of buf_si[] */ 4699 merge->nsend = 0; 4700 for (proc=0; proc<size; proc++) { 4701 len_si[proc] = 0; 4702 if (proc == rank) { 4703 len_s[proc] = 0; 4704 } else { 4705 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4706 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4707 } 4708 if (len_s[proc]) { 4709 merge->nsend++; 4710 nrows = 0; 4711 for (i=owners[proc]; i<owners[proc+1]; i++) { 4712 if (ai[i+1] > ai[i]) nrows++; 4713 } 4714 len_si[proc] = 2*(nrows+1); 4715 len += len_si[proc]; 4716 } 4717 } 4718 4719 /* determine the number and length of messages to receive for ij-structure */ 4720 /*-------------------------------------------------------------------------*/ 4721 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4722 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4723 4724 /* post the Irecv of j-structure */ 4725 /*-------------------------------*/ 4726 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4728 4729 /* post the Isend of j-structure */ 4730 /*--------------------------------*/ 4731 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4732 4733 for (proc=0, k=0; proc<size; proc++) { 4734 if (!len_s[proc]) continue; 4735 i = owners[proc]; 4736 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4737 k++; 4738 } 4739 4740 /* receives and sends of j-structure are complete */ 4741 /*------------------------------------------------*/ 4742 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4743 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4744 4745 /* send and recv i-structure */ 4746 /*---------------------------*/ 4747 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4748 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4749 4750 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4751 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4752 for (proc=0,k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 /* form outgoing message for i-structure: 4755 buf_si[0]: nrows to be sent 4756 [1:nrows]: row index (global) 4757 [nrows+1:2*nrows+1]: i-structure index 4758 */ 4759 /*-------------------------------------------*/ 4760 nrows = len_si[proc]/2 - 1; 4761 buf_si_i = buf_si + nrows+1; 4762 buf_si[0] = nrows; 4763 buf_si_i[0] = 0; 4764 nrows = 0; 4765 for (i=owners[proc]; i<owners[proc+1]; i++) { 4766 anzi = ai[i+1] - ai[i]; 4767 if (anzi) { 4768 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4769 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4770 nrows++; 4771 } 4772 } 4773 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4774 k++; 4775 buf_si += len_si[proc]; 4776 } 4777 4778 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4779 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4780 4781 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4782 for (i=0; i<merge->nrecv; i++) { 4783 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4784 } 4785 4786 ierr = PetscFree(len_si);CHKERRQ(ierr); 4787 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4788 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4789 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4790 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4791 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4792 ierr = PetscFree(status);CHKERRQ(ierr); 4793 4794 /* compute a local seq matrix in each processor */ 4795 /*----------------------------------------------*/ 4796 /* allocate bi array and free space for accumulating nonzero column info */ 4797 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4798 bi[0] = 0; 4799 4800 /* create and initialize a linked list */ 4801 nlnk = N+1; 4802 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 4804 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4805 len = ai[owners[rank+1]] - ai[owners[rank]]; 4806 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4807 4808 current_space = free_space; 4809 4810 /* determine symbolic info for each local row */ 4811 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4812 4813 for (k=0; k<merge->nrecv; k++) { 4814 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4815 nrows = *buf_ri_k[k]; 4816 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4817 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4818 } 4819 4820 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4821 len = 0; 4822 for (i=0; i<m; i++) { 4823 bnzi = 0; 4824 /* add local non-zero cols of this proc's seqmat into lnk */ 4825 arow = owners[rank] + i; 4826 anzi = ai[arow+1] - ai[arow]; 4827 aj = a->j + ai[arow]; 4828 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4829 bnzi += nlnk; 4830 /* add received col data into lnk */ 4831 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4832 if (i == *nextrow[k]) { /* i-th row */ 4833 anzi = *(nextai[k]+1) - *nextai[k]; 4834 aj = buf_rj[k] + *nextai[k]; 4835 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4836 bnzi += nlnk; 4837 nextrow[k]++; nextai[k]++; 4838 } 4839 } 4840 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4841 4842 /* if free space is not available, make more free space */ 4843 if (current_space->local_remaining<bnzi) { 4844 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4845 nspacedouble++; 4846 } 4847 /* copy data into free space, then initialize lnk */ 4848 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4849 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4850 4851 current_space->array += bnzi; 4852 current_space->local_used += bnzi; 4853 current_space->local_remaining -= bnzi; 4854 4855 bi[i+1] = bi[i] + bnzi; 4856 } 4857 4858 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4859 4860 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4861 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4862 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4863 4864 /* create symbolic parallel matrix B_mpi */ 4865 /*---------------------------------------*/ 4866 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4867 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4868 if (n==PETSC_DECIDE) { 4869 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4870 } else { 4871 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4872 } 4873 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4874 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4875 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4876 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4877 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4878 4879 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4880 B_mpi->assembled = PETSC_FALSE; 4881 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4882 merge->bi = bi; 4883 merge->bj = bj; 4884 merge->buf_ri = buf_ri; 4885 merge->buf_rj = buf_rj; 4886 merge->coi = NULL; 4887 merge->coj = NULL; 4888 merge->owners_co = NULL; 4889 4890 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4891 4892 /* attach the supporting struct to B_mpi for reuse */ 4893 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4894 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4895 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4896 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4897 *mpimat = B_mpi; 4898 4899 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 /*@C 4904 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4905 matrices from each processor 4906 4907 Collective on MPI_Comm 4908 4909 Input Parameters: 4910 + comm - the communicators the parallel matrix will live on 4911 . seqmat - the input sequential matrices 4912 . m - number of local rows (or PETSC_DECIDE) 4913 . n - number of local columns (or PETSC_DECIDE) 4914 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4915 4916 Output Parameter: 4917 . mpimat - the parallel matrix generated 4918 4919 Level: advanced 4920 4921 Notes: 4922 The dimensions of the sequential matrix in each processor MUST be the same. 4923 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4924 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4925 @*/ 4926 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4927 { 4928 PetscErrorCode ierr; 4929 PetscMPIInt size; 4930 4931 PetscFunctionBegin; 4932 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4933 if (size == 1) { 4934 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 if (scall == MAT_INITIAL_MATRIX) { 4936 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4937 } else { 4938 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4939 } 4940 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4941 PetscFunctionReturn(0); 4942 } 4943 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4944 if (scall == MAT_INITIAL_MATRIX) { 4945 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4946 } 4947 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4948 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 /*@ 4953 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4954 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4955 with MatGetSize() 4956 4957 Not Collective 4958 4959 Input Parameters: 4960 + A - the matrix 4961 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4962 4963 Output Parameter: 4964 . A_loc - the local sequential matrix generated 4965 4966 Level: developer 4967 4968 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4969 4970 @*/ 4971 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4972 { 4973 PetscErrorCode ierr; 4974 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4975 Mat_SeqAIJ *mat,*a,*b; 4976 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4977 MatScalar *aa,*ba,*cam; 4978 PetscScalar *ca; 4979 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4980 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4981 PetscBool match; 4982 MPI_Comm comm; 4983 PetscMPIInt size; 4984 4985 PetscFunctionBegin; 4986 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4987 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4988 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4989 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4990 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4991 4992 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4993 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4994 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4995 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4996 aa = a->a; ba = b->a; 4997 if (scall == MAT_INITIAL_MATRIX) { 4998 if (size == 1) { 4999 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5004 ci[0] = 0; 5005 for (i=0; i<am; i++) { 5006 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5007 } 5008 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5009 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5010 k = 0; 5011 for (i=0; i<am; i++) { 5012 ncols_o = bi[i+1] - bi[i]; 5013 ncols_d = ai[i+1] - ai[i]; 5014 /* off-diagonal portion of A */ 5015 for (jo=0; jo<ncols_o; jo++) { 5016 col = cmap[*bj]; 5017 if (col >= cstart) break; 5018 cj[k] = col; bj++; 5019 ca[k++] = *ba++; 5020 } 5021 /* diagonal portion of A */ 5022 for (j=0; j<ncols_d; j++) { 5023 cj[k] = cstart + *aj++; 5024 ca[k++] = *aa++; 5025 } 5026 /* off-diagonal portion of A */ 5027 for (j=jo; j<ncols_o; j++) { 5028 cj[k] = cmap[*bj++]; 5029 ca[k++] = *ba++; 5030 } 5031 } 5032 /* put together the new matrix */ 5033 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5034 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5035 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5036 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5037 mat->free_a = PETSC_TRUE; 5038 mat->free_ij = PETSC_TRUE; 5039 mat->nonew = 0; 5040 } else if (scall == MAT_REUSE_MATRIX) { 5041 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5042 ci = mat->i; cj = mat->j; cam = mat->a; 5043 for (i=0; i<am; i++) { 5044 /* off-diagonal portion of A */ 5045 ncols_o = bi[i+1] - bi[i]; 5046 for (jo=0; jo<ncols_o; jo++) { 5047 col = cmap[*bj]; 5048 if (col >= cstart) break; 5049 *cam++ = *ba++; bj++; 5050 } 5051 /* diagonal portion of A */ 5052 ncols_d = ai[i+1] - ai[i]; 5053 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5054 /* off-diagonal portion of A */ 5055 for (j=jo; j<ncols_o; j++) { 5056 *cam++ = *ba++; bj++; 5057 } 5058 } 5059 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5060 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5061 PetscFunctionReturn(0); 5062 } 5063 5064 /*@C 5065 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5066 5067 Not Collective 5068 5069 Input Parameters: 5070 + A - the matrix 5071 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5072 - row, col - index sets of rows and columns to extract (or NULL) 5073 5074 Output Parameter: 5075 . A_loc - the local sequential matrix generated 5076 5077 Level: developer 5078 5079 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5080 5081 @*/ 5082 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5083 { 5084 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5085 PetscErrorCode ierr; 5086 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5087 IS isrowa,iscola; 5088 Mat *aloc; 5089 PetscBool match; 5090 5091 PetscFunctionBegin; 5092 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5093 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5094 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5095 if (!row) { 5096 start = A->rmap->rstart; end = A->rmap->rend; 5097 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5098 } else { 5099 isrowa = *row; 5100 } 5101 if (!col) { 5102 start = A->cmap->rstart; 5103 cmap = a->garray; 5104 nzA = a->A->cmap->n; 5105 nzB = a->B->cmap->n; 5106 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5107 ncols = 0; 5108 for (i=0; i<nzB; i++) { 5109 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5110 else break; 5111 } 5112 imark = i; 5113 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5114 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5115 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5116 } else { 5117 iscola = *col; 5118 } 5119 if (scall != MAT_INITIAL_MATRIX) { 5120 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5121 aloc[0] = *A_loc; 5122 } 5123 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5124 if (!col) { /* attach global id of condensed columns */ 5125 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5126 } 5127 *A_loc = aloc[0]; 5128 ierr = PetscFree(aloc);CHKERRQ(ierr); 5129 if (!row) { 5130 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5131 } 5132 if (!col) { 5133 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5134 } 5135 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5136 PetscFunctionReturn(0); 5137 } 5138 5139 /*@C 5140 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5141 5142 Collective on Mat 5143 5144 Input Parameters: 5145 + A,B - the matrices in mpiaij format 5146 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5147 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5148 5149 Output Parameter: 5150 + rowb, colb - index sets of rows and columns of B to extract 5151 - B_seq - the sequential matrix generated 5152 5153 Level: developer 5154 5155 @*/ 5156 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5157 { 5158 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5159 PetscErrorCode ierr; 5160 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5161 IS isrowb,iscolb; 5162 Mat *bseq=NULL; 5163 5164 PetscFunctionBegin; 5165 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5166 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5167 } 5168 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5169 5170 if (scall == MAT_INITIAL_MATRIX) { 5171 start = A->cmap->rstart; 5172 cmap = a->garray; 5173 nzA = a->A->cmap->n; 5174 nzB = a->B->cmap->n; 5175 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5176 ncols = 0; 5177 for (i=0; i<nzB; i++) { /* row < local row index */ 5178 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5179 else break; 5180 } 5181 imark = i; 5182 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5183 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5184 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5185 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5186 } else { 5187 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5188 isrowb = *rowb; iscolb = *colb; 5189 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5190 bseq[0] = *B_seq; 5191 } 5192 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5193 *B_seq = bseq[0]; 5194 ierr = PetscFree(bseq);CHKERRQ(ierr); 5195 if (!rowb) { 5196 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5197 } else { 5198 *rowb = isrowb; 5199 } 5200 if (!colb) { 5201 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5202 } else { 5203 *colb = iscolb; 5204 } 5205 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5206 PetscFunctionReturn(0); 5207 } 5208 5209 /* 5210 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5211 of the OFF-DIAGONAL portion of local A 5212 5213 Collective on Mat 5214 5215 Input Parameters: 5216 + A,B - the matrices in mpiaij format 5217 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5218 5219 Output Parameter: 5220 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5221 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5222 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5223 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5224 5225 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5226 for this matrix. This is not desirable.. 5227 5228 Level: developer 5229 5230 */ 5231 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5232 { 5233 PetscErrorCode ierr; 5234 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5235 Mat_SeqAIJ *b_oth; 5236 VecScatter ctx; 5237 MPI_Comm comm; 5238 const PetscMPIInt *rprocs,*sprocs; 5239 const PetscInt *srow,*rstarts,*sstarts; 5240 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5241 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5242 PetscScalar *b_otha,*bufa,*bufA,*vals; 5243 MPI_Request *rwaits = NULL,*swaits = NULL; 5244 MPI_Status rstatus; 5245 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5246 5247 PetscFunctionBegin; 5248 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5250 5251 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5252 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5253 } 5254 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5255 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5256 5257 if (size == 1) { 5258 startsj_s = NULL; 5259 bufa_ptr = NULL; 5260 *B_oth = NULL; 5261 PetscFunctionReturn(0); 5262 } 5263 5264 ctx = a->Mvctx; 5265 tag = ((PetscObject)ctx)->tag; 5266 5267 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5268 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5269 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5270 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5271 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5272 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5273 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5274 5275 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5276 if (scall == MAT_INITIAL_MATRIX) { 5277 /* i-array */ 5278 /*---------*/ 5279 /* post receives */ 5280 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5281 for (i=0; i<nrecvs; i++) { 5282 rowlen = rvalues + rstarts[i]*rbs; 5283 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5284 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5285 } 5286 5287 /* pack the outgoing message */ 5288 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5289 5290 sstartsj[0] = 0; 5291 rstartsj[0] = 0; 5292 len = 0; /* total length of j or a array to be sent */ 5293 if (nsends) { 5294 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5295 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5296 } 5297 for (i=0; i<nsends; i++) { 5298 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5299 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5300 for (j=0; j<nrows; j++) { 5301 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5302 for (l=0; l<sbs; l++) { 5303 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5304 5305 rowlen[j*sbs+l] = ncols; 5306 5307 len += ncols; 5308 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5309 } 5310 k++; 5311 } 5312 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5313 5314 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5315 } 5316 /* recvs and sends of i-array are completed */ 5317 i = nrecvs; 5318 while (i--) { 5319 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5320 } 5321 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5322 ierr = PetscFree(svalues);CHKERRQ(ierr); 5323 5324 /* allocate buffers for sending j and a arrays */ 5325 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5326 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5327 5328 /* create i-array of B_oth */ 5329 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5330 5331 b_othi[0] = 0; 5332 len = 0; /* total length of j or a array to be received */ 5333 k = 0; 5334 for (i=0; i<nrecvs; i++) { 5335 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5336 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5337 for (j=0; j<nrows; j++) { 5338 b_othi[k+1] = b_othi[k] + rowlen[j]; 5339 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5340 k++; 5341 } 5342 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5343 } 5344 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5345 5346 /* allocate space for j and a arrrays of B_oth */ 5347 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5348 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5349 5350 /* j-array */ 5351 /*---------*/ 5352 /* post receives of j-array */ 5353 for (i=0; i<nrecvs; i++) { 5354 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5355 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message j-array */ 5359 if (nsends) k = sstarts[0]; 5360 for (i=0; i<nsends; i++) { 5361 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5362 bufJ = bufj+sstartsj[i]; 5363 for (j=0; j<nrows; j++) { 5364 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5365 for (ll=0; ll<sbs; ll++) { 5366 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5367 for (l=0; l<ncols; l++) { 5368 *bufJ++ = cols[l]; 5369 } 5370 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5371 } 5372 } 5373 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5374 } 5375 5376 /* recvs and sends of j-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5382 } else if (scall == MAT_REUSE_MATRIX) { 5383 sstartsj = *startsj_s; 5384 rstartsj = *startsj_r; 5385 bufa = *bufa_ptr; 5386 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5387 b_otha = b_oth->a; 5388 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5389 5390 /* a-array */ 5391 /*---------*/ 5392 /* post receives of a-array */ 5393 for (i=0; i<nrecvs; i++) { 5394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5395 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* pack the outgoing message a-array */ 5399 if (nsends) k = sstarts[0]; 5400 for (i=0; i<nsends; i++) { 5401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5402 bufA = bufa+sstartsj[i]; 5403 for (j=0; j<nrows; j++) { 5404 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5405 for (ll=0; ll<sbs; ll++) { 5406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5407 for (l=0; l<ncols; l++) { 5408 *bufA++ = vals[l]; 5409 } 5410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5414 } 5415 /* recvs and sends of a-array are completed */ 5416 i = nrecvs; 5417 while (i--) { 5418 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5419 } 5420 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5421 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5422 5423 if (scall == MAT_INITIAL_MATRIX) { 5424 /* put together the new matrix */ 5425 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5426 5427 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5428 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5429 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5430 b_oth->free_a = PETSC_TRUE; 5431 b_oth->free_ij = PETSC_TRUE; 5432 b_oth->nonew = 0; 5433 5434 ierr = PetscFree(bufj);CHKERRQ(ierr); 5435 if (!startsj_s || !bufa_ptr) { 5436 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5437 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5438 } else { 5439 *startsj_s = sstartsj; 5440 *startsj_r = rstartsj; 5441 *bufa_ptr = bufa; 5442 } 5443 } 5444 5445 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5446 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5447 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5448 PetscFunctionReturn(0); 5449 } 5450 5451 /*@C 5452 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5453 5454 Not Collective 5455 5456 Input Parameters: 5457 . A - The matrix in mpiaij format 5458 5459 Output Parameter: 5460 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5461 . colmap - A map from global column index to local index into lvec 5462 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5463 5464 Level: developer 5465 5466 @*/ 5467 #if defined(PETSC_USE_CTABLE) 5468 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5469 #else 5470 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5471 #endif 5472 { 5473 Mat_MPIAIJ *a; 5474 5475 PetscFunctionBegin; 5476 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5477 PetscValidPointer(lvec, 2); 5478 PetscValidPointer(colmap, 3); 5479 PetscValidPointer(multScatter, 4); 5480 a = (Mat_MPIAIJ*) A->data; 5481 if (lvec) *lvec = a->lvec; 5482 if (colmap) *colmap = a->colmap; 5483 if (multScatter) *multScatter = a->Mvctx; 5484 PetscFunctionReturn(0); 5485 } 5486 5487 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5488 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5489 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5490 #if defined(PETSC_HAVE_MKL_SPARSE) 5491 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5492 #endif 5493 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5494 #if defined(PETSC_HAVE_ELEMENTAL) 5495 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5496 #endif 5497 #if defined(PETSC_HAVE_HYPRE) 5498 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5499 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5500 #endif 5501 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5502 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5503 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5504 5505 /* 5506 Computes (B'*A')' since computing B*A directly is untenable 5507 5508 n p p 5509 ( ) ( ) ( ) 5510 m ( A ) * n ( B ) = m ( C ) 5511 ( ) ( ) ( ) 5512 5513 */ 5514 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5515 { 5516 PetscErrorCode ierr; 5517 Mat At,Bt,Ct; 5518 5519 PetscFunctionBegin; 5520 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5521 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5522 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5523 ierr = MatDestroy(&At);CHKERRQ(ierr); 5524 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5525 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5526 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5527 PetscFunctionReturn(0); 5528 } 5529 5530 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5531 { 5532 PetscErrorCode ierr; 5533 PetscInt m=A->rmap->n,n=B->cmap->n; 5534 Mat Cmat; 5535 5536 PetscFunctionBegin; 5537 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5538 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5539 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5540 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5541 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5542 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5543 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5544 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5545 5546 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5547 5548 *C = Cmat; 5549 PetscFunctionReturn(0); 5550 } 5551 5552 /* ----------------------------------------------------------------*/ 5553 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5554 { 5555 PetscErrorCode ierr; 5556 5557 PetscFunctionBegin; 5558 if (scall == MAT_INITIAL_MATRIX) { 5559 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5560 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5561 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5562 } 5563 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5564 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5565 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5566 PetscFunctionReturn(0); 5567 } 5568 5569 /*MC 5570 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5571 5572 Options Database Keys: 5573 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5574 5575 Level: beginner 5576 5577 .seealso: MatCreateAIJ() 5578 M*/ 5579 5580 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5581 { 5582 Mat_MPIAIJ *b; 5583 PetscErrorCode ierr; 5584 PetscMPIInt size; 5585 5586 PetscFunctionBegin; 5587 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5588 5589 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5590 B->data = (void*)b; 5591 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5592 B->assembled = PETSC_FALSE; 5593 B->insertmode = NOT_SET_VALUES; 5594 b->size = size; 5595 5596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5597 5598 /* build cache for off array entries formed */ 5599 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5600 5601 b->donotstash = PETSC_FALSE; 5602 b->colmap = 0; 5603 b->garray = 0; 5604 b->roworiented = PETSC_TRUE; 5605 5606 /* stuff used for matrix vector multiply */ 5607 b->lvec = NULL; 5608 b->Mvctx = NULL; 5609 5610 /* stuff for MatGetRow() */ 5611 b->rowindices = 0; 5612 b->rowvalues = 0; 5613 b->getrowactive = PETSC_FALSE; 5614 5615 /* flexible pointer used in CUSP/CUSPARSE classes */ 5616 b->spptr = NULL; 5617 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5628 #if defined(PETSC_HAVE_MKL_SPARSE) 5629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5633 #if defined(PETSC_HAVE_ELEMENTAL) 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5635 #endif 5636 #if defined(PETSC_HAVE_HYPRE) 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5638 #endif 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5644 #if defined(PETSC_HAVE_HYPRE) 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5646 #endif 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5649 PetscFunctionReturn(0); 5650 } 5651 5652 /*@C 5653 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5654 and "off-diagonal" part of the matrix in CSR format. 5655 5656 Collective on MPI_Comm 5657 5658 Input Parameters: 5659 + comm - MPI communicator 5660 . m - number of local rows (Cannot be PETSC_DECIDE) 5661 . n - This value should be the same as the local size used in creating the 5662 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5663 calculated if N is given) For square matrices n is almost always m. 5664 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5665 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5666 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5667 . j - column indices 5668 . a - matrix values 5669 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5670 . oj - column indices 5671 - oa - matrix values 5672 5673 Output Parameter: 5674 . mat - the matrix 5675 5676 Level: advanced 5677 5678 Notes: 5679 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5680 must free the arrays once the matrix has been destroyed and not before. 5681 5682 The i and j indices are 0 based 5683 5684 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5685 5686 This sets local rows and cannot be used to set off-processor values. 5687 5688 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5689 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5690 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5691 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5692 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5693 communication if it is known that only local entries will be set. 5694 5695 .keywords: matrix, aij, compressed row, sparse, parallel 5696 5697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5698 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5699 @*/ 5700 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5701 { 5702 PetscErrorCode ierr; 5703 Mat_MPIAIJ *maij; 5704 5705 PetscFunctionBegin; 5706 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5707 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5708 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5709 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5710 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5711 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5712 maij = (Mat_MPIAIJ*) (*mat)->data; 5713 5714 (*mat)->preallocated = PETSC_TRUE; 5715 5716 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5717 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5718 5719 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5721 5722 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5723 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5725 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 5727 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5728 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5731 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5732 PetscFunctionReturn(0); 5733 } 5734 5735 /* 5736 Special version for direct calls from Fortran 5737 */ 5738 #include <petsc/private/fortranimpl.h> 5739 5740 /* Change these macros so can be used in void function */ 5741 #undef CHKERRQ 5742 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5743 #undef SETERRQ2 5744 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5745 #undef SETERRQ3 5746 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5747 #undef SETERRQ 5748 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5749 5750 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5751 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5752 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5753 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5754 #else 5755 #endif 5756 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5757 { 5758 Mat mat = *mmat; 5759 PetscInt m = *mm, n = *mn; 5760 InsertMode addv = *maddv; 5761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5762 PetscScalar value; 5763 PetscErrorCode ierr; 5764 5765 MatCheckPreallocated(mat,1); 5766 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5767 5768 #if defined(PETSC_USE_DEBUG) 5769 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5770 #endif 5771 { 5772 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5773 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5774 PetscBool roworiented = aij->roworiented; 5775 5776 /* Some Variables required in the macro */ 5777 Mat A = aij->A; 5778 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5779 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5780 MatScalar *aa = a->a; 5781 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5782 Mat B = aij->B; 5783 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5784 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5785 MatScalar *ba = b->a; 5786 5787 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5788 PetscInt nonew = a->nonew; 5789 MatScalar *ap1,*ap2; 5790 5791 PetscFunctionBegin; 5792 for (i=0; i<m; i++) { 5793 if (im[i] < 0) continue; 5794 #if defined(PETSC_USE_DEBUG) 5795 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5796 #endif 5797 if (im[i] >= rstart && im[i] < rend) { 5798 row = im[i] - rstart; 5799 lastcol1 = -1; 5800 rp1 = aj + ai[row]; 5801 ap1 = aa + ai[row]; 5802 rmax1 = aimax[row]; 5803 nrow1 = ailen[row]; 5804 low1 = 0; 5805 high1 = nrow1; 5806 lastcol2 = -1; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 5814 for (j=0; j<n; j++) { 5815 if (roworiented) value = v[i*n+j]; 5816 else value = v[i+j*m]; 5817 if (in[j] >= cstart && in[j] < cend) { 5818 col = in[j] - cstart; 5819 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5820 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5821 } else if (in[j] < 0) continue; 5822 #if defined(PETSC_USE_DEBUG) 5823 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5824 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5825 #endif 5826 else { 5827 if (mat->was_assembled) { 5828 if (!aij->colmap) { 5829 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5830 } 5831 #if defined(PETSC_USE_CTABLE) 5832 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5833 col--; 5834 #else 5835 col = aij->colmap[in[j]] - 1; 5836 #endif 5837 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5838 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5839 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5840 col = in[j]; 5841 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5842 B = aij->B; 5843 b = (Mat_SeqAIJ*)B->data; 5844 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5845 rp2 = bj + bi[row]; 5846 ap2 = ba + bi[row]; 5847 rmax2 = bimax[row]; 5848 nrow2 = bilen[row]; 5849 low2 = 0; 5850 high2 = nrow2; 5851 bm = aij->B->rmap->n; 5852 ba = b->a; 5853 } 5854 } else col = in[j]; 5855 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5856 } 5857 } 5858 } else if (!aij->donotstash) { 5859 if (roworiented) { 5860 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5861 } else { 5862 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5863 } 5864 } 5865 } 5866 } 5867 PetscFunctionReturnVoid(); 5868 } 5869