1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) ap1[_i] += value; \ 444 else ap1[_i] = value; \ 445 goto a_noinsert; \ 446 } \ 447 } \ 448 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 449 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 450 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 451 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 452 N = nrow1++ - 1; a->nz++; high1++; \ 453 /* shift up all the later entries in this row */ \ 454 for (ii=N; ii>=_i; ii--) { \ 455 rp1[ii+1] = rp1[ii]; \ 456 ap1[ii+1] = ap1[ii]; \ 457 } \ 458 rp1[_i] = col; \ 459 ap1[_i] = value; \ 460 A->nonzerostate++;\ 461 a_noinsert: ; \ 462 ailen[row] = nrow1; \ 463 } 464 465 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 466 { \ 467 if (col <= lastcol2) low2 = 0; \ 468 else high2 = nrow2; \ 469 lastcol2 = col; \ 470 while (high2-low2 > 5) { \ 471 t = (low2+high2)/2; \ 472 if (rp2[t] > col) high2 = t; \ 473 else low2 = t; \ 474 } \ 475 for (_i=low2; _i<high2; _i++) { \ 476 if (rp2[_i] > col) break; \ 477 if (rp2[_i] == col) { \ 478 if (addv == ADD_VALUES) ap2[_i] += value; \ 479 else ap2[_i] = value; \ 480 goto b_noinsert; \ 481 } \ 482 } \ 483 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 485 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 486 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 487 N = nrow2++ - 1; b->nz++; high2++; \ 488 /* shift up all the later entries in this row */ \ 489 for (ii=N; ii>=_i; ii--) { \ 490 rp2[ii+1] = rp2[ii]; \ 491 ap2[ii+1] = ap2[ii]; \ 492 } \ 493 rp2[_i] = col; \ 494 ap2[_i] = value; \ 495 B->nonzerostate++; \ 496 b_noinsert: ; \ 497 bilen[row] = nrow2; \ 498 } 499 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 527 { 528 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 529 PetscScalar value; 530 PetscErrorCode ierr; 531 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 532 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 533 PetscBool roworiented = aij->roworiented; 534 535 /* Some Variables required in the macro */ 536 Mat A = aij->A; 537 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 538 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 539 MatScalar *aa = a->a; 540 PetscBool ignorezeroentries = a->ignorezeroentries; 541 Mat B = aij->B; 542 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 543 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 544 MatScalar *ba = b->a; 545 546 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1,*ap2; 549 550 PetscFunctionBegin; 551 for (i=0; i<m; i++) { 552 if (im[i] < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 555 #endif 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = aj + ai[row]; 560 ap1 = aa + ai[row]; 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = bj + bi[row]; 567 ap2 = ba + bi[row]; 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j=0; j<n; j++) { 574 if (roworiented) value = v[i*n+j]; 575 else value = v[i+j*m]; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 580 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 581 } else if (in[j] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 584 #endif 585 else { 586 if (mat->was_assembled) { 587 if (!aij->colmap) { 588 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 589 } 590 #if defined(PETSC_USE_CTABLE) 591 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 592 col--; 593 #else 594 col = aij->colmap[in[j]] - 1; 595 #endif 596 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 597 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 598 col = in[j]; 599 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 600 B = aij->B; 601 b = (Mat_SeqAIJ*)B->data; 602 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 603 rp2 = bj + bi[row]; 604 ap2 = ba + bi[row]; 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0) { 612 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 613 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 614 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 619 } 620 } 621 } else { 622 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } else { 628 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 629 } 630 } 631 } 632 } 633 PetscFunctionReturn(0); 634 } 635 636 /* 637 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 638 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 639 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 640 */ 641 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 642 { 643 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 644 Mat A = aij->A; /* diagonal part of the matrix */ 645 Mat B = aij->B; /* offdiagonal part of the matrix */ 646 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 647 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 648 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 649 PetscInt *ailen = a->ilen,*aj = a->j; 650 PetscInt *bilen = b->ilen,*bj = b->j; 651 PetscInt am = aij->A->rmap->n,j; 652 PetscInt diag_so_far = 0,dnz; 653 PetscInt offd_so_far = 0,onz; 654 655 PetscFunctionBegin; 656 /* Iterate over all rows of the matrix */ 657 for (j=0; j<am; j++) { 658 dnz = onz = 0; 659 /* Iterate over all non-zero columns of the current row */ 660 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 661 /* If column is in the diagonal */ 662 if (mat_j[col] >= cstart && mat_j[col] < cend) { 663 aj[diag_so_far++] = mat_j[col] - cstart; 664 dnz++; 665 } else { /* off-diagonal entries */ 666 bj[offd_so_far++] = mat_j[col]; 667 onz++; 668 } 669 } 670 ailen[j] = dnz; 671 bilen[j] = onz; 672 } 673 PetscFunctionReturn(0); 674 } 675 676 /* 677 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 678 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 679 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 680 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 681 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 682 */ 683 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 Mat A = aij->A; /* diagonal part of the matrix */ 687 Mat B = aij->B; /* offdiagonal part of the matrix */ 688 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 689 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 690 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 691 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 692 PetscInt *ailen = a->ilen,*aj = a->j; 693 PetscInt *bilen = b->ilen,*bj = b->j; 694 PetscInt am = aij->A->rmap->n,j; 695 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 696 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 697 PetscScalar *aa = a->a,*ba = b->a; 698 699 PetscFunctionBegin; 700 /* Iterate over all rows of the matrix */ 701 for (j=0; j<am; j++) { 702 dnz_row = onz_row = 0; 703 rowstart_offd = full_offd_i[j]; 704 rowstart_diag = full_diag_i[j]; 705 /* Iterate over all non-zero columns of the current row */ 706 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 707 /* If column is in the diagonal */ 708 if (mat_j[col] >= cstart && mat_j[col] < cend) { 709 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 710 aa[rowstart_diag+dnz_row] = mat_a[col]; 711 dnz_row++; 712 } else { /* off-diagonal entries */ 713 bj[rowstart_offd+onz_row] = mat_j[col]; 714 ba[rowstart_offd+onz_row] = mat_a[col]; 715 onz_row++; 716 } 717 } 718 ailen[j] = dnz_row; 719 bilen[j] = onz_row; 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 727 PetscErrorCode ierr; 728 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 730 731 PetscFunctionBegin; 732 for (i=0; i<m; i++) { 733 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 734 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 735 if (idxm[i] >= rstart && idxm[i] < rend) { 736 row = idxm[i] - rstart; 737 for (j=0; j<n; j++) { 738 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 739 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 743 } else { 744 if (!aij->colmap) { 745 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 746 } 747 #if defined(PETSC_USE_CTABLE) 748 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 749 col--; 750 #else 751 col = aij->colmap[idxn[j]] - 1; 752 #endif 753 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 754 else { 755 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 756 } 757 } 758 } 759 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 760 } 761 PetscFunctionReturn(0); 762 } 763 764 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 765 766 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 PetscErrorCode ierr; 770 PetscInt nstash,reallocs; 771 772 PetscFunctionBegin; 773 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 774 775 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 776 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 777 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 782 { 783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 784 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 785 PetscErrorCode ierr; 786 PetscMPIInt n; 787 PetscInt i,j,rstart,ncols,flg; 788 PetscInt *row,*col; 789 PetscBool other_disassembled; 790 PetscScalar *val; 791 792 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 793 794 PetscFunctionBegin; 795 if (!aij->donotstash && !mat->nooffprocentries) { 796 while (1) { 797 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 798 if (!flg) break; 799 800 for (i=0; i<n; ) { 801 /* Now identify the consecutive vals belonging to the same row */ 802 for (j=i,rstart=row[j]; j<n; j++) { 803 if (row[j] != rstart) break; 804 } 805 if (j < n) ncols = j-i; 806 else ncols = n-i; 807 /* Now assemble all these values with a single function call */ 808 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 809 810 i = j; 811 } 812 } 813 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 814 } 815 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 816 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourselfs, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 825 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 826 if (mat->was_assembled && !other_disassembled) { 827 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 831 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 832 } 833 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 834 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 835 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 836 837 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 838 839 aij->rowvalues = 0; 840 841 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 842 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 855 PetscErrorCode ierr; 856 857 PetscFunctionBegin; 858 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 859 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 860 PetscFunctionReturn(0); 861 } 862 863 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 866 PetscObjectState sA, sB; 867 PetscInt *lrows; 868 PetscInt r, len; 869 PetscBool cong, lch, gch; 870 PetscErrorCode ierr; 871 872 PetscFunctionBegin; 873 /* get locally owned rows */ 874 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 875 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 876 /* fix right hand side if needed */ 877 if (x && b) { 878 const PetscScalar *xx; 879 PetscScalar *bb; 880 881 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 882 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 883 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 884 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 885 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 886 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 887 } 888 889 sA = mat->A->nonzerostate; 890 sB = mat->B->nonzerostate; 891 892 if (diag != 0.0 && cong) { 893 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 894 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 895 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 896 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 897 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 898 PetscInt nnwA, nnwB; 899 PetscBool nnzA, nnzB; 900 901 nnwA = aijA->nonew; 902 nnwB = aijB->nonew; 903 nnzA = aijA->keepnonzeropattern; 904 nnzB = aijB->keepnonzeropattern; 905 if (!nnzA) { 906 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 907 aijA->nonew = 0; 908 } 909 if (!nnzB) { 910 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 911 aijB->nonew = 0; 912 } 913 /* Must zero here before the next loop */ 914 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 915 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) { 917 const PetscInt row = lrows[r] + A->rmap->rstart; 918 if (row >= A->cmap->N) continue; 919 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 920 } 921 aijA->nonew = nnwA; 922 aijB->nonew = nnwB; 923 } else { 924 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 925 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 926 } 927 ierr = PetscFree(lrows);CHKERRQ(ierr); 928 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 929 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 930 931 /* reduce nonzerostate */ 932 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 933 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 934 if (gch) A->nonzerostate++; 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 939 { 940 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 941 PetscErrorCode ierr; 942 PetscMPIInt n = A->rmap->n; 943 PetscInt i,j,r,m,p = 0,len = 0; 944 PetscInt *lrows,*owners = A->rmap->range; 945 PetscSFNode *rrows; 946 PetscSF sf; 947 const PetscScalar *xx; 948 PetscScalar *bb,*mask; 949 Vec xmask,lmask; 950 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 951 const PetscInt *aj, *ii,*ridx; 952 PetscScalar *aa; 953 954 PetscFunctionBegin; 955 /* Create SF where leaves are input rows and roots are owned rows */ 956 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 957 for (r = 0; r < n; ++r) lrows[r] = -1; 958 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 959 for (r = 0; r < N; ++r) { 960 const PetscInt idx = rows[r]; 961 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 962 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 963 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 964 } 965 rrows[r].rank = p; 966 rrows[r].index = rows[r] - owners[p]; 967 } 968 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 969 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 970 /* Collect flags for rows to be zeroed */ 971 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 972 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 973 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 974 /* Compress and put in row numbers */ 975 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 976 /* zero diagonal part of matrix */ 977 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 978 /* handle off diagonal part of matrix */ 979 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 980 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 981 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 982 for (i=0; i<len; i++) bb[lrows[i]] = 1; 983 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 984 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 987 if (x && b) { /* this code is buggy when the row and column layout don't match */ 988 PetscBool cong; 989 990 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 991 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 992 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 995 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 996 } 997 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 998 /* remove zeroed rows of off diagonal matrix */ 999 ii = aij->i; 1000 for (i=0; i<len; i++) { 1001 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1002 } 1003 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1004 if (aij->compressedrow.use) { 1005 m = aij->compressedrow.nrows; 1006 ii = aij->compressedrow.i; 1007 ridx = aij->compressedrow.rindex; 1008 for (i=0; i<m; i++) { 1009 n = ii[i+1] - ii[i]; 1010 aj = aij->j + ii[i]; 1011 aa = aij->a + ii[i]; 1012 1013 for (j=0; j<n; j++) { 1014 if (PetscAbsScalar(mask[*aj])) { 1015 if (b) bb[*ridx] -= *aa*xx[*aj]; 1016 *aa = 0.0; 1017 } 1018 aa++; 1019 aj++; 1020 } 1021 ridx++; 1022 } 1023 } else { /* do not use compressed row format */ 1024 m = l->B->rmap->n; 1025 for (i=0; i<m; i++) { 1026 n = ii[i+1] - ii[i]; 1027 aj = aij->j + ii[i]; 1028 aa = aij->a + ii[i]; 1029 for (j=0; j<n; j++) { 1030 if (PetscAbsScalar(mask[*aj])) { 1031 if (b) bb[i] -= *aa*xx[*aj]; 1032 *aa = 0.0; 1033 } 1034 aa++; 1035 aj++; 1036 } 1037 } 1038 } 1039 if (x && b) { 1040 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1041 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1042 } 1043 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1044 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1045 ierr = PetscFree(lrows);CHKERRQ(ierr); 1046 1047 /* only change matrix nonzero state if pattern was allowed to be changed */ 1048 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1049 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1050 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1051 } 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 PetscInt nt; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1064 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1065 1066 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1067 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1074 { 1075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 VecScatter Mvctx = a->Mvctx; 1088 1089 PetscFunctionBegin; 1090 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1091 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1093 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1094 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 /* do nondiagonal part */ 1105 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1106 /* do local part */ 1107 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1108 /* add partial results together */ 1109 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1110 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1115 { 1116 MPI_Comm comm; 1117 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1118 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1119 IS Me,Notme; 1120 PetscErrorCode ierr; 1121 PetscInt M,N,first,last,*notme,i; 1122 PetscBool lf; 1123 PetscMPIInt size; 1124 1125 PetscFunctionBegin; 1126 /* Easy test: symmetric diagonal block */ 1127 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1128 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1129 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1130 if (!*f) PetscFunctionReturn(0); 1131 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1132 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1133 if (size == 1) PetscFunctionReturn(0); 1134 1135 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1136 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1137 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1139 for (i=0; i<first; i++) notme[i] = i; 1140 for (i=last; i<M; i++) notme[i-last+first] = i; 1141 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1142 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1143 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1144 Aoff = Aoffs[0]; 1145 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1146 Boff = Boffs[0]; 1147 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1148 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1149 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1150 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1151 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1152 ierr = PetscFree(notme);CHKERRQ(ierr); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1157 { 1158 PetscErrorCode ierr; 1159 1160 PetscFunctionBegin; 1161 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1166 { 1167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 /* do nondiagonal part */ 1172 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1173 /* do local part */ 1174 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1175 /* add partial results together */ 1176 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1177 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 /* 1182 This only works correctly for square matrices where the subblock A->A is the 1183 diagonal block 1184 */ 1185 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1186 { 1187 PetscErrorCode ierr; 1188 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1189 1190 PetscFunctionBegin; 1191 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1192 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1193 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1204 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1209 { 1210 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 #if defined(PETSC_USE_LOG) 1215 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1216 #endif 1217 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1218 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1219 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1220 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1221 #if defined(PETSC_USE_CTABLE) 1222 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1223 #else 1224 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1225 #endif 1226 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1227 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1228 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1229 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1230 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1231 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1232 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1233 1234 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1243 #if defined(PETSC_HAVE_ELEMENTAL) 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1245 #endif 1246 #if defined(PETSC_HAVE_HYPRE) 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1249 #endif 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1259 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1260 PetscErrorCode ierr; 1261 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1262 int fd; 1263 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1264 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1265 PetscScalar *column_values; 1266 PetscInt message_count,flowcontrolcount; 1267 FILE *file; 1268 1269 PetscFunctionBegin; 1270 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1271 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1272 nz = A->nz + B->nz; 1273 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1274 if (!rank) { 1275 header[0] = MAT_FILE_CLASSID; 1276 header[1] = mat->rmap->N; 1277 header[2] = mat->cmap->N; 1278 1279 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 /* get largest number of rows any processor has */ 1282 rlen = mat->rmap->n; 1283 range = mat->rmap->range; 1284 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1285 } else { 1286 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 rlen = mat->rmap->n; 1288 } 1289 1290 /* load up the local row counts */ 1291 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1292 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1293 1294 /* store the row lengths to the file */ 1295 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1296 if (!rank) { 1297 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 rlen = range[i+1] - range[i]; 1301 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1303 } 1304 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1305 } else { 1306 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1307 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1308 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1309 } 1310 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1311 1312 /* load up the local column indices */ 1313 nzmax = nz; /* th processor needs space a largest processor needs */ 1314 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1316 cnt = 0; 1317 for (i=0; i<mat->rmap->n; i++) { 1318 for (j=B->i[i]; j<B->i[i+1]; j++) { 1319 if ((col = garray[B->j[j]]) > cstart) break; 1320 column_indices[cnt++] = col; 1321 } 1322 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1323 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1324 } 1325 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1326 1327 /* store the column indices to the file */ 1328 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1329 if (!rank) { 1330 MPI_Status status; 1331 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 for (i=1; i<size; i++) { 1333 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1334 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1335 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1336 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1337 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1338 } 1339 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1342 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1343 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1344 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1345 } 1346 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1347 1348 /* load up the local column values */ 1349 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1350 cnt = 0; 1351 for (i=0; i<mat->rmap->n; i++) { 1352 for (j=B->i[i]; j<B->i[i+1]; j++) { 1353 if (garray[B->j[j]] > cstart) break; 1354 column_values[cnt++] = B->a[j]; 1355 } 1356 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1357 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1358 } 1359 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1360 1361 /* store the column values to the file */ 1362 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1363 if (!rank) { 1364 MPI_Status status; 1365 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1366 for (i=1; i<size; i++) { 1367 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1368 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1369 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1370 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1371 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1372 } 1373 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1374 } else { 1375 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1376 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1378 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1379 } 1380 ierr = PetscFree(column_values);CHKERRQ(ierr); 1381 1382 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1383 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #include <petscdraw.h> 1388 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1389 { 1390 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1391 PetscErrorCode ierr; 1392 PetscMPIInt rank = aij->rank,size = aij->size; 1393 PetscBool isdraw,iascii,isbinary; 1394 PetscViewer sviewer; 1395 PetscViewerFormat format; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1399 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1400 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1401 if (iascii) { 1402 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1403 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1404 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1405 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1406 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1407 for (i=0; i<(PetscInt)size; i++) { 1408 nmax = PetscMax(nmax,nz[i]); 1409 nmin = PetscMin(nmin,nz[i]); 1410 navg += nz[i]; 1411 } 1412 ierr = PetscFree(nz);CHKERRQ(ierr); 1413 navg = navg/size; 1414 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1418 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1419 MatInfo info; 1420 PetscBool inodes; 1421 1422 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1423 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1424 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1426 if (!inodes) { 1427 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1428 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1429 } else { 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1431 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1432 } 1433 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1434 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1435 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1436 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1437 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1438 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1440 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1443 PetscInt inodecount,inodelimit,*inodes; 1444 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1445 if (inodes) { 1446 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1447 } else { 1448 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1449 } 1450 PetscFunctionReturn(0); 1451 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1452 PetscFunctionReturn(0); 1453 } 1454 } else if (isbinary) { 1455 if (size == 1) { 1456 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1457 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1458 } else { 1459 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1460 } 1461 PetscFunctionReturn(0); 1462 } else if (iascii && size == 1) { 1463 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1464 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } else if (isdraw) { 1467 PetscDraw draw; 1468 PetscBool isnull; 1469 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1470 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1471 if (isnull) PetscFunctionReturn(0); 1472 } 1473 1474 { /* assemble the entire matrix onto first processor */ 1475 Mat A = NULL, Av; 1476 IS isrow,iscol; 1477 1478 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1479 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1480 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1481 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1482 /* The commented code uses MatCreateSubMatrices instead */ 1483 /* 1484 Mat *AA, A = NULL, Av; 1485 IS isrow,iscol; 1486 1487 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1488 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1489 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1490 if (!rank) { 1491 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1492 A = AA[0]; 1493 Av = AA[0]; 1494 } 1495 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1496 */ 1497 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1498 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1499 /* 1500 Everyone has to call to draw the matrix since the graphics waits are 1501 synchronized across all processors that share the PetscDraw object 1502 */ 1503 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1504 if (!rank) { 1505 if (((PetscObject)mat)->name) { 1506 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 } 1508 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1509 } 1510 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1511 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1512 ierr = MatDestroy(&A);CHKERRQ(ierr); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1518 { 1519 PetscErrorCode ierr; 1520 PetscBool iascii,isdraw,issocket,isbinary; 1521 1522 PetscFunctionBegin; 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1526 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1527 if (iascii || isdraw || isbinary || issocket) { 1528 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1534 { 1535 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1536 PetscErrorCode ierr; 1537 Vec bb1 = 0; 1538 PetscBool hasop; 1539 1540 PetscFunctionBegin; 1541 if (flag == SOR_APPLY_UPPER) { 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1547 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1548 } 1549 1550 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1551 if (flag & SOR_ZERO_INITIAL_GUESS) { 1552 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1553 its--; 1554 } 1555 1556 while (its--) { 1557 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 1560 /* update rhs: bb1 = bb - B*x */ 1561 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1562 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1563 1564 /* local sweep */ 1565 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1566 } 1567 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1568 if (flag & SOR_ZERO_INITIAL_GUESS) { 1569 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1570 its--; 1571 } 1572 while (its--) { 1573 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1575 1576 /* update rhs: bb1 = bb - B*x */ 1577 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1578 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1579 1580 /* local sweep */ 1581 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1582 } 1583 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1584 if (flag & SOR_ZERO_INITIAL_GUESS) { 1585 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1586 its--; 1587 } 1588 while (its--) { 1589 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 1592 /* update rhs: bb1 = bb - B*x */ 1593 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1594 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1595 1596 /* local sweep */ 1597 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1598 } 1599 } else if (flag & SOR_EISENSTAT) { 1600 Vec xx1; 1601 1602 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1604 1605 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 if (!mat->diag) { 1608 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1609 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1610 } 1611 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1612 if (hasop) { 1613 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1614 } else { 1615 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1616 } 1617 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1618 1619 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1620 1621 /* local sweep */ 1622 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1623 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1624 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1625 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1626 1627 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1628 1629 matin->factorerrortype = mat->A->factorerrortype; 1630 PetscFunctionReturn(0); 1631 } 1632 1633 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1634 { 1635 Mat aA,aB,Aperm; 1636 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1637 PetscScalar *aa,*ba; 1638 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1639 PetscSF rowsf,sf; 1640 IS parcolp = NULL; 1641 PetscBool done; 1642 PetscErrorCode ierr; 1643 1644 PetscFunctionBegin; 1645 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1646 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1647 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1648 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1649 1650 /* Invert row permutation to find out where my rows should go */ 1651 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1652 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1653 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1654 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1655 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1657 1658 /* Invert column permutation to find out where my columns should go */ 1659 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1660 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1661 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1662 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1663 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1665 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1666 1667 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1668 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1669 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1670 1671 /* Find out where my gcols should go */ 1672 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1673 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1674 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1675 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1676 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1677 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1679 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1680 1681 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1683 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1684 for (i=0; i<m; i++) { 1685 PetscInt row = rdest[i],rowner; 1686 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1687 for (j=ai[i]; j<ai[i+1]; j++) { 1688 PetscInt cowner,col = cdest[aj[j]]; 1689 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1690 if (rowner == cowner) dnnz[i]++; 1691 else onnz[i]++; 1692 } 1693 for (j=bi[i]; j<bi[i+1]; j++) { 1694 PetscInt cowner,col = gcdest[bj[j]]; 1695 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1696 if (rowner == cowner) dnnz[i]++; 1697 else onnz[i]++; 1698 } 1699 } 1700 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1704 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1705 1706 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1708 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1709 for (i=0; i<m; i++) { 1710 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1711 PetscInt j0,rowlen; 1712 rowlen = ai[i+1] - ai[i]; 1713 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1714 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1715 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1716 } 1717 rowlen = bi[i+1] - bi[i]; 1718 for (j0=j=0; j<rowlen; j0=j) { 1719 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1720 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1721 } 1722 } 1723 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1726 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1728 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1729 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1730 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1731 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1732 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1733 *B = Aperm; 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1738 { 1739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1744 if (ghosts) *ghosts = aij->garray; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1749 { 1750 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1751 Mat A = mat->A,B = mat->B; 1752 PetscErrorCode ierr; 1753 PetscReal isend[5],irecv[5]; 1754 1755 PetscFunctionBegin; 1756 info->block_size = 1.0; 1757 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1758 1759 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1760 isend[3] = info->memory; isend[4] = info->mallocs; 1761 1762 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1763 1764 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1765 isend[3] += info->memory; isend[4] += info->mallocs; 1766 if (flag == MAT_LOCAL) { 1767 info->nz_used = isend[0]; 1768 info->nz_allocated = isend[1]; 1769 info->nz_unneeded = isend[2]; 1770 info->memory = isend[3]; 1771 info->mallocs = isend[4]; 1772 } else if (flag == MAT_GLOBAL_MAX) { 1773 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1774 1775 info->nz_used = irecv[0]; 1776 info->nz_allocated = irecv[1]; 1777 info->nz_unneeded = irecv[2]; 1778 info->memory = irecv[3]; 1779 info->mallocs = irecv[4]; 1780 } else if (flag == MAT_GLOBAL_SUM) { 1781 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1782 1783 info->nz_used = irecv[0]; 1784 info->nz_allocated = irecv[1]; 1785 info->nz_unneeded = irecv[2]; 1786 info->memory = irecv[3]; 1787 info->mallocs = irecv[4]; 1788 } 1789 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1790 info->fill_ratio_needed = 0; 1791 info->factor_mallocs = 0; 1792 PetscFunctionReturn(0); 1793 } 1794 1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1796 { 1797 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1798 PetscErrorCode ierr; 1799 1800 PetscFunctionBegin; 1801 switch (op) { 1802 case MAT_NEW_NONZERO_LOCATIONS: 1803 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1804 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1805 case MAT_KEEP_NONZERO_PATTERN: 1806 case MAT_NEW_NONZERO_LOCATION_ERR: 1807 case MAT_USE_INODES: 1808 case MAT_IGNORE_ZERO_ENTRIES: 1809 MatCheckPreallocated(A,1); 1810 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1811 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_ROW_ORIENTED: 1814 MatCheckPreallocated(A,1); 1815 a->roworiented = flg; 1816 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_NEW_DIAGONALS: 1821 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1822 break; 1823 case MAT_IGNORE_OFF_PROC_ENTRIES: 1824 a->donotstash = flg; 1825 break; 1826 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1827 case MAT_SPD: 1828 case MAT_SYMMETRIC: 1829 case MAT_STRUCTURALLY_SYMMETRIC: 1830 case MAT_HERMITIAN: 1831 case MAT_SYMMETRY_ETERNAL: 1832 break; 1833 case MAT_SUBMAT_SINGLEIS: 1834 A->submat_singleis = flg; 1835 break; 1836 case MAT_STRUCTURE_ONLY: 1837 /* The option is handled directly by MatSetOption() */ 1838 break; 1839 default: 1840 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1841 } 1842 PetscFunctionReturn(0); 1843 } 1844 1845 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1846 { 1847 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1848 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1849 PetscErrorCode ierr; 1850 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1851 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1852 PetscInt *cmap,*idx_p; 1853 1854 PetscFunctionBegin; 1855 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1856 mat->getrowactive = PETSC_TRUE; 1857 1858 if (!mat->rowvalues && (idx || v)) { 1859 /* 1860 allocate enough space to hold information from the longest row. 1861 */ 1862 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1863 PetscInt max = 1,tmp; 1864 for (i=0; i<matin->rmap->n; i++) { 1865 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1866 if (max < tmp) max = tmp; 1867 } 1868 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1869 } 1870 1871 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1872 lrow = row - rstart; 1873 1874 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1875 if (!v) {pvA = 0; pvB = 0;} 1876 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1877 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 nztot = nzA + nzB; 1880 1881 cmap = mat->garray; 1882 if (v || idx) { 1883 if (nztot) { 1884 /* Sort by increasing column numbers, assuming A and B already sorted */ 1885 PetscInt imark = -1; 1886 if (v) { 1887 *v = v_p = mat->rowvalues; 1888 for (i=0; i<nzB; i++) { 1889 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1890 else break; 1891 } 1892 imark = i; 1893 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1894 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1895 } 1896 if (idx) { 1897 *idx = idx_p = mat->rowindices; 1898 if (imark > -1) { 1899 for (i=0; i<imark; i++) { 1900 idx_p[i] = cmap[cworkB[i]]; 1901 } 1902 } else { 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1905 else break; 1906 } 1907 imark = i; 1908 } 1909 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1910 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1911 } 1912 } else { 1913 if (idx) *idx = 0; 1914 if (v) *v = 0; 1915 } 1916 } 1917 *nz = nztot; 1918 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1919 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1924 { 1925 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1926 1927 PetscFunctionBegin; 1928 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1929 aij->getrowactive = PETSC_FALSE; 1930 PetscFunctionReturn(0); 1931 } 1932 1933 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1934 { 1935 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1936 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1937 PetscErrorCode ierr; 1938 PetscInt i,j,cstart = mat->cmap->rstart; 1939 PetscReal sum = 0.0; 1940 MatScalar *v; 1941 1942 PetscFunctionBegin; 1943 if (aij->size == 1) { 1944 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1945 } else { 1946 if (type == NORM_FROBENIUS) { 1947 v = amat->a; 1948 for (i=0; i<amat->nz; i++) { 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 } 1951 v = bmat->a; 1952 for (i=0; i<bmat->nz; i++) { 1953 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1954 } 1955 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1956 *norm = PetscSqrtReal(*norm); 1957 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1958 } else if (type == NORM_1) { /* max column norm */ 1959 PetscReal *tmp,*tmp2; 1960 PetscInt *jj,*garray = aij->garray; 1961 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1962 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1963 *norm = 0.0; 1964 v = amat->a; jj = amat->j; 1965 for (j=0; j<amat->nz; j++) { 1966 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1967 } 1968 v = bmat->a; jj = bmat->j; 1969 for (j=0; j<bmat->nz; j++) { 1970 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1971 } 1972 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1973 for (j=0; j<mat->cmap->N; j++) { 1974 if (tmp2[j] > *norm) *norm = tmp2[j]; 1975 } 1976 ierr = PetscFree(tmp);CHKERRQ(ierr); 1977 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1978 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1979 } else if (type == NORM_INFINITY) { /* max row norm */ 1980 PetscReal ntemp = 0.0; 1981 for (j=0; j<aij->A->rmap->n; j++) { 1982 v = amat->a + amat->i[j]; 1983 sum = 0.0; 1984 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1985 sum += PetscAbsScalar(*v); v++; 1986 } 1987 v = bmat->a + bmat->i[j]; 1988 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1989 sum += PetscAbsScalar(*v); v++; 1990 } 1991 if (sum > ntemp) ntemp = sum; 1992 } 1993 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2001 { 2002 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2003 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2004 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2005 PetscErrorCode ierr; 2006 Mat B,A_diag,*B_diag; 2007 MatScalar *array; 2008 2009 PetscFunctionBegin; 2010 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2011 ai = Aloc->i; aj = Aloc->j; 2012 bi = Bloc->i; bj = Bloc->j; 2013 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2014 PetscInt *d_nnz,*g_nnz,*o_nnz; 2015 PetscSFNode *oloc; 2016 PETSC_UNUSED PetscSF sf; 2017 2018 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2019 /* compute d_nnz for preallocation */ 2020 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (i=0; i<ai[ma]; i++) { 2022 d_nnz[aj[i]]++; 2023 } 2024 /* compute local off-diagonal contributions */ 2025 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2026 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2027 /* map those to global */ 2028 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2029 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2030 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2031 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2032 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2034 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2035 2036 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2037 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2038 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2039 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2040 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2041 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2042 } else { 2043 B = *matout; 2044 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2045 } 2046 2047 b = (Mat_MPIAIJ*)B->data; 2048 A_diag = a->A; 2049 B_diag = &b->A; 2050 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2051 A_diag_ncol = A_diag->cmap->N; 2052 B_diag_ilen = sub_B_diag->ilen; 2053 B_diag_i = sub_B_diag->i; 2054 2055 /* Set ilen for diagonal of B */ 2056 for (i=0; i<A_diag_ncol; i++) { 2057 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2058 } 2059 2060 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2061 very quickly (=without using MatSetValues), because all writes are local. */ 2062 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2063 2064 /* copy over the B part */ 2065 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2066 array = Bloc->a; 2067 row = A->rmap->rstart; 2068 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2069 cols_tmp = cols; 2070 for (i=0; i<mb; i++) { 2071 ncol = bi[i+1]-bi[i]; 2072 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2073 row++; 2074 array += ncol; cols_tmp += ncol; 2075 } 2076 ierr = PetscFree(cols);CHKERRQ(ierr); 2077 2078 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2081 *matout = B; 2082 } else { 2083 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2084 } 2085 PetscFunctionReturn(0); 2086 } 2087 2088 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2089 { 2090 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2091 Mat a = aij->A,b = aij->B; 2092 PetscErrorCode ierr; 2093 PetscInt s1,s2,s3; 2094 2095 PetscFunctionBegin; 2096 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2097 if (rr) { 2098 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2099 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2100 /* Overlap communication with computation. */ 2101 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2102 } 2103 if (ll) { 2104 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2105 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2106 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2107 } 2108 /* scale the diagonal block */ 2109 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2110 2111 if (rr) { 2112 /* Do a scatter end and then right scale the off-diagonal block */ 2113 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2120 { 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 PetscErrorCode ierr; 2123 2124 PetscFunctionBegin; 2125 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2130 { 2131 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2132 Mat a,b,c,d; 2133 PetscBool flg; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 a = matA->A; b = matA->B; 2138 c = matB->A; d = matB->B; 2139 2140 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2141 if (flg) { 2142 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2143 } 2144 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2149 { 2150 PetscErrorCode ierr; 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2152 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2153 2154 PetscFunctionBegin; 2155 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2156 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2157 /* because of the column compression in the off-processor part of the matrix a->B, 2158 the number of columns in a->B and b->B may be different, hence we cannot call 2159 the MatCopy() directly on the two parts. If need be, we can provide a more 2160 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2161 then copying the submatrices */ 2162 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2163 } else { 2164 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2165 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2166 } 2167 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2172 { 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* 2181 Computes the number of nonzeros per row needed for preallocation when X and Y 2182 have different nonzero structure. 2183 */ 2184 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2185 { 2186 PetscInt i,j,k,nzx,nzy; 2187 2188 PetscFunctionBegin; 2189 /* Set the number of nonzeros in the new matrix */ 2190 for (i=0; i<m; i++) { 2191 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2192 nzx = xi[i+1] - xi[i]; 2193 nzy = yi[i+1] - yi[i]; 2194 nnz[i] = 0; 2195 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2196 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2197 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2198 nnz[i]++; 2199 } 2200 for (; k<nzy; k++) nnz[i]++; 2201 } 2202 PetscFunctionReturn(0); 2203 } 2204 2205 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2206 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2207 { 2208 PetscErrorCode ierr; 2209 PetscInt m = Y->rmap->N; 2210 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2211 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2212 2213 PetscFunctionBegin; 2214 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2219 { 2220 PetscErrorCode ierr; 2221 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2222 PetscBLASInt bnz,one=1; 2223 Mat_SeqAIJ *x,*y; 2224 2225 PetscFunctionBegin; 2226 if (str == SAME_NONZERO_PATTERN) { 2227 PetscScalar alpha = a; 2228 x = (Mat_SeqAIJ*)xx->A->data; 2229 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2230 y = (Mat_SeqAIJ*)yy->A->data; 2231 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2232 x = (Mat_SeqAIJ*)xx->B->data; 2233 y = (Mat_SeqAIJ*)yy->B->data; 2234 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2235 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2236 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2237 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2238 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2239 } else { 2240 Mat B; 2241 PetscInt *nnz_d,*nnz_o; 2242 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2243 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2244 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2245 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2246 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2247 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2248 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2251 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2252 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2253 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2256 } 2257 PetscFunctionReturn(0); 2258 } 2259 2260 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2261 2262 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2263 { 2264 #if defined(PETSC_USE_COMPLEX) 2265 PetscErrorCode ierr; 2266 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2267 2268 PetscFunctionBegin; 2269 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2270 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2271 #else 2272 PetscFunctionBegin; 2273 #endif 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2284 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2295 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2296 PetscFunctionReturn(0); 2297 } 2298 2299 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2300 { 2301 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2302 PetscErrorCode ierr; 2303 PetscInt i,*idxb = 0; 2304 PetscScalar *va,*vb; 2305 Vec vtmp; 2306 2307 PetscFunctionBegin; 2308 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2309 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2310 if (idx) { 2311 for (i=0; i<A->rmap->n; i++) { 2312 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2313 } 2314 } 2315 2316 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2317 if (idx) { 2318 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2319 } 2320 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2321 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2322 2323 for (i=0; i<A->rmap->n; i++) { 2324 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2325 va[i] = vb[i]; 2326 if (idx) idx[i] = a->garray[idxb[i]]; 2327 } 2328 } 2329 2330 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2331 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2332 ierr = PetscFree(idxb);CHKERRQ(ierr); 2333 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2334 PetscFunctionReturn(0); 2335 } 2336 2337 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2338 { 2339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2340 PetscErrorCode ierr; 2341 PetscInt i,*idxb = 0; 2342 PetscScalar *va,*vb; 2343 Vec vtmp; 2344 2345 PetscFunctionBegin; 2346 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2347 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2348 if (idx) { 2349 for (i=0; i<A->cmap->n; i++) { 2350 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2351 } 2352 } 2353 2354 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2355 if (idx) { 2356 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2357 } 2358 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2359 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2360 2361 for (i=0; i<A->rmap->n; i++) { 2362 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2363 va[i] = vb[i]; 2364 if (idx) idx[i] = a->garray[idxb[i]]; 2365 } 2366 } 2367 2368 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2369 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2370 ierr = PetscFree(idxb);CHKERRQ(ierr); 2371 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2376 { 2377 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2378 PetscInt n = A->rmap->n; 2379 PetscInt cstart = A->cmap->rstart; 2380 PetscInt *cmap = mat->garray; 2381 PetscInt *diagIdx, *offdiagIdx; 2382 Vec diagV, offdiagV; 2383 PetscScalar *a, *diagA, *offdiagA; 2384 PetscInt r; 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2389 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2390 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2391 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2392 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2394 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2395 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2396 for (r = 0; r < n; ++r) { 2397 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2398 a[r] = diagA[r]; 2399 idx[r] = cstart + diagIdx[r]; 2400 } else { 2401 a[r] = offdiagA[r]; 2402 idx[r] = cmap[offdiagIdx[r]]; 2403 } 2404 } 2405 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2409 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2410 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2415 { 2416 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2417 PetscInt n = A->rmap->n; 2418 PetscInt cstart = A->cmap->rstart; 2419 PetscInt *cmap = mat->garray; 2420 PetscInt *diagIdx, *offdiagIdx; 2421 Vec diagV, offdiagV; 2422 PetscScalar *a, *diagA, *offdiagA; 2423 PetscInt r; 2424 PetscErrorCode ierr; 2425 2426 PetscFunctionBegin; 2427 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2428 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2429 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2430 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2431 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2432 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2433 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2434 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2435 for (r = 0; r < n; ++r) { 2436 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2437 a[r] = diagA[r]; 2438 idx[r] = cstart + diagIdx[r]; 2439 } else { 2440 a[r] = offdiagA[r]; 2441 idx[r] = cmap[offdiagIdx[r]]; 2442 } 2443 } 2444 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2446 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2447 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2448 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2449 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2454 { 2455 PetscErrorCode ierr; 2456 Mat *dummy; 2457 2458 PetscFunctionBegin; 2459 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2460 *newmat = *dummy; 2461 ierr = PetscFree(dummy);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2466 { 2467 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2468 PetscErrorCode ierr; 2469 2470 PetscFunctionBegin; 2471 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2472 A->factorerrortype = a->A->factorerrortype; 2473 PetscFunctionReturn(0); 2474 } 2475 2476 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2480 2481 PetscFunctionBegin; 2482 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2483 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2484 if (x->assembled) { 2485 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2486 } else { 2487 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2488 } 2489 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2490 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2495 { 2496 PetscFunctionBegin; 2497 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2498 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2499 PetscFunctionReturn(0); 2500 } 2501 2502 /*@ 2503 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2504 2505 Collective on Mat 2506 2507 Input Parameters: 2508 + A - the matrix 2509 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2510 2511 Level: advanced 2512 2513 @*/ 2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2524 { 2525 PetscErrorCode ierr; 2526 PetscBool sc = PETSC_FALSE,flg; 2527 2528 PetscFunctionBegin; 2529 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2530 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2531 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2532 if (flg) { 2533 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2534 } 2535 ierr = PetscOptionsTail();CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2540 { 2541 PetscErrorCode ierr; 2542 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2543 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2544 2545 PetscFunctionBegin; 2546 if (!Y->preallocated) { 2547 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2548 } else if (!aij->nz) { 2549 PetscInt nonew = aij->nonew; 2550 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2551 aij->nonew = nonew; 2552 } 2553 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2558 { 2559 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2564 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2565 if (d) { 2566 PetscInt rstart; 2567 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2568 *d += rstart; 2569 2570 } 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2575 { 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 MatGetDiagonalBlock_MPIAIJ, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatCreateSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatCreateSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 MatIsSymmetric_MPIAIJ, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 MatInvertVariableBlockDiagonal_MPIAIJ, 2713 MatCreateSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 MatRARt_MPIAIJ_MPIAIJ, 2722 0, 2723 0, 2724 /*139*/MatSetBlockSizes_MPIAIJ, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2757 { 2758 Mat_MPIAIJ *b; 2759 PetscErrorCode ierr; 2760 PetscMPIInt size; 2761 2762 PetscFunctionBegin; 2763 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2765 b = (Mat_MPIAIJ*)B->data; 2766 2767 #if defined(PETSC_USE_CTABLE) 2768 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2771 #endif 2772 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2773 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2774 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2775 2776 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2777 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2778 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 2785 if (!B->preallocated) { 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2791 } 2792 2793 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2794 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2795 B->preallocated = PETSC_TRUE; 2796 B->was_assembled = PETSC_FALSE; 2797 B->assembled = PETSC_FALSE; 2798 PetscFunctionReturn(0); 2799 } 2800 2801 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2802 { 2803 Mat_MPIAIJ *b; 2804 PetscErrorCode ierr; 2805 2806 PetscFunctionBegin; 2807 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2808 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2809 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2810 b = (Mat_MPIAIJ*)B->data; 2811 2812 #if defined(PETSC_USE_CTABLE) 2813 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2814 #else 2815 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2816 #endif 2817 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2818 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2819 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2820 2821 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2822 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2823 B->preallocated = PETSC_TRUE; 2824 B->was_assembled = PETSC_FALSE; 2825 B->assembled = PETSC_FALSE; 2826 PetscFunctionReturn(0); 2827 } 2828 2829 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2830 { 2831 Mat mat; 2832 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2833 PetscErrorCode ierr; 2834 2835 PetscFunctionBegin; 2836 *newmat = 0; 2837 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2838 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2839 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2840 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2841 a = (Mat_MPIAIJ*)mat->data; 2842 2843 mat->factortype = matin->factortype; 2844 mat->assembled = PETSC_TRUE; 2845 mat->insertmode = NOT_SET_VALUES; 2846 mat->preallocated = PETSC_TRUE; 2847 2848 a->size = oldmat->size; 2849 a->rank = oldmat->rank; 2850 a->donotstash = oldmat->donotstash; 2851 a->roworiented = oldmat->roworiented; 2852 a->rowindices = 0; 2853 a->rowvalues = 0; 2854 a->getrowactive = PETSC_FALSE; 2855 2856 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2857 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2858 2859 if (oldmat->colmap) { 2860 #if defined(PETSC_USE_CTABLE) 2861 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2862 #else 2863 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2864 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2865 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2866 #endif 2867 } else a->colmap = 0; 2868 if (oldmat->garray) { 2869 PetscInt len; 2870 len = oldmat->B->cmap->n; 2871 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2872 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2873 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2874 } else a->garray = 0; 2875 2876 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2877 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2878 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2879 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2880 2881 if (oldmat->Mvctx_mpi1) { 2882 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2884 } 2885 2886 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2888 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2890 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2891 *newmat = mat; 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2896 { 2897 PetscBool isbinary, ishdf5; 2898 PetscErrorCode ierr; 2899 2900 PetscFunctionBegin; 2901 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2902 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2903 /* force binary viewer to load .info file if it has not yet done so */ 2904 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2905 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2906 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2907 if (isbinary) { 2908 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2909 } else if (ishdf5) { 2910 #if defined(PETSC_HAVE_HDF5) 2911 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2912 #else 2913 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2914 #endif 2915 } else { 2916 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2917 } 2918 PetscFunctionReturn(0); 2919 } 2920 2921 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2922 { 2923 PetscScalar *vals,*svals; 2924 MPI_Comm comm; 2925 PetscErrorCode ierr; 2926 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2927 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2928 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2929 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2930 PetscInt cend,cstart,n,*rowners; 2931 int fd; 2932 PetscInt bs = newMat->rmap->bs; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2938 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2939 if (!rank) { 2940 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2941 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2942 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2943 } 2944 2945 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2946 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2947 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2948 if (bs < 0) bs = 1; 2949 2950 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2951 M = header[1]; N = header[2]; 2952 2953 /* If global sizes are set, check if they are consistent with that given in the file */ 2954 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2955 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2956 2957 /* determine ownership of all (block) rows */ 2958 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2959 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2960 else m = newMat->rmap->n; /* Set by user */ 2961 2962 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2963 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2964 2965 /* First process needs enough room for process with most rows */ 2966 if (!rank) { 2967 mmax = rowners[1]; 2968 for (i=2; i<=size; i++) { 2969 mmax = PetscMax(mmax, rowners[i]); 2970 } 2971 } else mmax = -1; /* unused, but compilers complain */ 2972 2973 rowners[0] = 0; 2974 for (i=2; i<=size; i++) { 2975 rowners[i] += rowners[i-1]; 2976 } 2977 rstart = rowners[rank]; 2978 rend = rowners[rank+1]; 2979 2980 /* distribute row lengths to all processors */ 2981 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2982 if (!rank) { 2983 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2984 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2985 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2986 for (j=0; j<m; j++) { 2987 procsnz[0] += ourlens[j]; 2988 } 2989 for (i=1; i<size; i++) { 2990 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2991 /* calculate the number of nonzeros on each processor */ 2992 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2993 procsnz[i] += rowlengths[j]; 2994 } 2995 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2996 } 2997 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2998 } else { 2999 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3000 } 3001 3002 if (!rank) { 3003 /* determine max buffer needed and allocate it */ 3004 maxnz = 0; 3005 for (i=0; i<size; i++) { 3006 maxnz = PetscMax(maxnz,procsnz[i]); 3007 } 3008 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3009 3010 /* read in my part of the matrix column indices */ 3011 nz = procsnz[0]; 3012 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3013 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3014 3015 /* read in every one elses and ship off */ 3016 for (i=1; i<size; i++) { 3017 nz = procsnz[i]; 3018 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3019 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3020 } 3021 ierr = PetscFree(cols);CHKERRQ(ierr); 3022 } else { 3023 /* determine buffer space needed for message */ 3024 nz = 0; 3025 for (i=0; i<m; i++) { 3026 nz += ourlens[i]; 3027 } 3028 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3029 3030 /* receive message of column indices*/ 3031 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3032 } 3033 3034 /* determine column ownership if matrix is not square */ 3035 if (N != M) { 3036 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3037 else n = newMat->cmap->n; 3038 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3039 cstart = cend - n; 3040 } else { 3041 cstart = rstart; 3042 cend = rend; 3043 n = cend - cstart; 3044 } 3045 3046 /* loop over local rows, determining number of off diagonal entries */ 3047 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3048 jj = 0; 3049 for (i=0; i<m; i++) { 3050 for (j=0; j<ourlens[i]; j++) { 3051 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3052 jj++; 3053 } 3054 } 3055 3056 for (i=0; i<m; i++) { 3057 ourlens[i] -= offlens[i]; 3058 } 3059 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3060 3061 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3062 3063 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3064 3065 for (i=0; i<m; i++) { 3066 ourlens[i] += offlens[i]; 3067 } 3068 3069 if (!rank) { 3070 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3071 3072 /* read in my part of the matrix numerical values */ 3073 nz = procsnz[0]; 3074 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3075 3076 /* insert into matrix */ 3077 jj = rstart; 3078 smycols = mycols; 3079 svals = vals; 3080 for (i=0; i<m; i++) { 3081 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3082 smycols += ourlens[i]; 3083 svals += ourlens[i]; 3084 jj++; 3085 } 3086 3087 /* read in other processors and ship out */ 3088 for (i=1; i<size; i++) { 3089 nz = procsnz[i]; 3090 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3091 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3092 } 3093 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3094 } else { 3095 /* receive numeric values */ 3096 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3097 3098 /* receive message of values*/ 3099 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3100 3101 /* insert into matrix */ 3102 jj = rstart; 3103 smycols = mycols; 3104 svals = vals; 3105 for (i=0; i<m; i++) { 3106 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3107 smycols += ourlens[i]; 3108 svals += ourlens[i]; 3109 jj++; 3110 } 3111 } 3112 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3113 ierr = PetscFree(vals);CHKERRQ(ierr); 3114 ierr = PetscFree(mycols);CHKERRQ(ierr); 3115 ierr = PetscFree(rowners);CHKERRQ(ierr); 3116 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3117 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3118 PetscFunctionReturn(0); 3119 } 3120 3121 /* Not scalable because of ISAllGather() unless getting all columns. */ 3122 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3123 { 3124 PetscErrorCode ierr; 3125 IS iscol_local; 3126 PetscBool isstride; 3127 PetscMPIInt lisstride=0,gisstride; 3128 3129 PetscFunctionBegin; 3130 /* check if we are grabbing all columns*/ 3131 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3132 3133 if (isstride) { 3134 PetscInt start,len,mstart,mlen; 3135 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3136 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3137 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3138 if (mstart == start && mlen-mstart == len) lisstride = 1; 3139 } 3140 3141 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3142 if (gisstride) { 3143 PetscInt N; 3144 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3145 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3146 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3147 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3148 } else { 3149 PetscInt cbs; 3150 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3151 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3152 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3153 } 3154 3155 *isseq = iscol_local; 3156 PetscFunctionReturn(0); 3157 } 3158 3159 /* 3160 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3161 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3162 3163 Input Parameters: 3164 mat - matrix 3165 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3166 i.e., mat->rstart <= isrow[i] < mat->rend 3167 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3168 i.e., mat->cstart <= iscol[i] < mat->cend 3169 Output Parameter: 3170 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3171 iscol_o - sequential column index set for retrieving mat->B 3172 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3173 */ 3174 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3175 { 3176 PetscErrorCode ierr; 3177 Vec x,cmap; 3178 const PetscInt *is_idx; 3179 PetscScalar *xarray,*cmaparray; 3180 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3181 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3182 Mat B=a->B; 3183 Vec lvec=a->lvec,lcmap; 3184 PetscInt i,cstart,cend,Bn=B->cmap->N; 3185 MPI_Comm comm; 3186 VecScatter Mvctx=a->Mvctx; 3187 3188 PetscFunctionBegin; 3189 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3190 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3191 3192 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3193 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3194 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3195 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3196 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3197 3198 /* Get start indices */ 3199 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3200 isstart -= ncols; 3201 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3202 3203 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3204 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3205 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3206 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3207 for (i=0; i<ncols; i++) { 3208 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3209 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3210 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3211 } 3212 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3213 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3214 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3215 3216 /* Get iscol_d */ 3217 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3218 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3219 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3220 3221 /* Get isrow_d */ 3222 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3223 rstart = mat->rmap->rstart; 3224 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3225 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3226 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3227 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3228 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3230 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3231 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3232 3233 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3234 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 3237 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3238 3239 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3240 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3241 3242 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3243 /* off-process column indices */ 3244 count = 0; 3245 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3246 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3247 3248 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3249 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3250 for (i=0; i<Bn; i++) { 3251 if (PetscRealPart(xarray[i]) > -1.0) { 3252 idx[count] = i; /* local column index in off-diagonal part B */ 3253 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3254 count++; 3255 } 3256 } 3257 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3258 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3259 3260 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3261 /* cannot ensure iscol_o has same blocksize as iscol! */ 3262 3263 ierr = PetscFree(idx);CHKERRQ(ierr); 3264 *garray = cmap1; 3265 3266 ierr = VecDestroy(&x);CHKERRQ(ierr); 3267 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3268 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 3272 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3273 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3274 { 3275 PetscErrorCode ierr; 3276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3277 Mat M = NULL; 3278 MPI_Comm comm; 3279 IS iscol_d,isrow_d,iscol_o; 3280 Mat Asub = NULL,Bsub = NULL; 3281 PetscInt n; 3282 3283 PetscFunctionBegin; 3284 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3285 3286 if (call == MAT_REUSE_MATRIX) { 3287 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3288 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3289 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3290 3291 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3292 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3293 3294 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3295 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3296 3297 /* Update diagonal and off-diagonal portions of submat */ 3298 asub = (Mat_MPIAIJ*)(*submat)->data; 3299 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3300 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3301 if (n) { 3302 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3303 } 3304 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3305 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3306 3307 } else { /* call == MAT_INITIAL_MATRIX) */ 3308 const PetscInt *garray; 3309 PetscInt BsubN; 3310 3311 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3312 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3313 3314 /* Create local submatrices Asub and Bsub */ 3315 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3316 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3317 3318 /* Create submatrix M */ 3319 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3320 3321 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3322 asub = (Mat_MPIAIJ*)M->data; 3323 3324 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3325 n = asub->B->cmap->N; 3326 if (BsubN > n) { 3327 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3328 const PetscInt *idx; 3329 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3330 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3331 3332 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3333 j = 0; 3334 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3335 for (i=0; i<n; i++) { 3336 if (j >= BsubN) break; 3337 while (subgarray[i] > garray[j]) j++; 3338 3339 if (subgarray[i] == garray[j]) { 3340 idx_new[i] = idx[j++]; 3341 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3342 } 3343 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3344 3345 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3346 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3347 3348 } else if (BsubN < n) { 3349 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3350 } 3351 3352 ierr = PetscFree(garray);CHKERRQ(ierr); 3353 *submat = M; 3354 3355 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3356 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3357 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3358 3359 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3360 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3361 3362 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3363 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3364 } 3365 PetscFunctionReturn(0); 3366 } 3367 3368 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3369 { 3370 PetscErrorCode ierr; 3371 IS iscol_local=NULL,isrow_d; 3372 PetscInt csize; 3373 PetscInt n,i,j,start,end; 3374 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3375 MPI_Comm comm; 3376 3377 PetscFunctionBegin; 3378 /* If isrow has same processor distribution as mat, 3379 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3380 if (call == MAT_REUSE_MATRIX) { 3381 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3382 if (isrow_d) { 3383 sameRowDist = PETSC_TRUE; 3384 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3385 } else { 3386 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3387 if (iscol_local) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3390 } 3391 } 3392 } else { 3393 /* Check if isrow has same processor distribution as mat */ 3394 sameDist[0] = PETSC_FALSE; 3395 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3396 if (!n) { 3397 sameDist[0] = PETSC_TRUE; 3398 } else { 3399 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3400 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3401 if (i >= start && j < end) { 3402 sameDist[0] = PETSC_TRUE; 3403 } 3404 } 3405 3406 /* Check if iscol has same processor distribution as mat */ 3407 sameDist[1] = PETSC_FALSE; 3408 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3409 if (!n) { 3410 sameDist[1] = PETSC_TRUE; 3411 } else { 3412 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3413 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3414 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3415 } 3416 3417 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3418 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3419 sameRowDist = tsameDist[0]; 3420 } 3421 3422 if (sameRowDist) { 3423 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3424 /* isrow and iscol have same processor distribution as mat */ 3425 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } else { /* sameRowDist */ 3428 /* isrow has same processor distribution as mat */ 3429 if (call == MAT_INITIAL_MATRIX) { 3430 PetscBool sorted; 3431 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3432 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3433 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3434 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3435 3436 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3437 if (sorted) { 3438 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3439 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3440 PetscFunctionReturn(0); 3441 } 3442 } else { /* call == MAT_REUSE_MATRIX */ 3443 IS iscol_sub; 3444 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3445 if (iscol_sub) { 3446 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449 } 3450 } 3451 } 3452 3453 /* General case: iscol -> iscol_local which has global size of iscol */ 3454 if (call == MAT_REUSE_MATRIX) { 3455 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3456 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3457 } else { 3458 if (!iscol_local) { 3459 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3460 } 3461 } 3462 3463 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3464 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3465 3466 if (call == MAT_INITIAL_MATRIX) { 3467 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3468 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3469 } 3470 PetscFunctionReturn(0); 3471 } 3472 3473 /*@C 3474 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3475 and "off-diagonal" part of the matrix in CSR format. 3476 3477 Collective on MPI_Comm 3478 3479 Input Parameters: 3480 + comm - MPI communicator 3481 . A - "diagonal" portion of matrix 3482 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3483 - garray - global index of B columns 3484 3485 Output Parameter: 3486 . mat - the matrix, with input A as its local diagonal matrix 3487 Level: advanced 3488 3489 Notes: 3490 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3491 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3492 3493 .seealso: MatCreateMPIAIJWithSplitArrays() 3494 @*/ 3495 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3496 { 3497 PetscErrorCode ierr; 3498 Mat_MPIAIJ *maij; 3499 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3500 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3501 PetscScalar *oa=b->a; 3502 Mat Bnew; 3503 PetscInt m,n,N; 3504 3505 PetscFunctionBegin; 3506 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3507 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3508 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3509 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3510 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3511 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3512 3513 /* Get global columns of mat */ 3514 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3515 3516 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3517 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3518 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3519 maij = (Mat_MPIAIJ*)(*mat)->data; 3520 3521 (*mat)->preallocated = PETSC_TRUE; 3522 3523 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3524 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3525 3526 /* Set A as diagonal portion of *mat */ 3527 maij->A = A; 3528 3529 nz = oi[m]; 3530 for (i=0; i<nz; i++) { 3531 col = oj[i]; 3532 oj[i] = garray[col]; 3533 } 3534 3535 /* Set Bnew as off-diagonal portion of *mat */ 3536 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3537 bnew = (Mat_SeqAIJ*)Bnew->data; 3538 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3539 maij->B = Bnew; 3540 3541 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3542 3543 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3544 b->free_a = PETSC_FALSE; 3545 b->free_ij = PETSC_FALSE; 3546 ierr = MatDestroy(&B);CHKERRQ(ierr); 3547 3548 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3549 bnew->free_a = PETSC_TRUE; 3550 bnew->free_ij = PETSC_TRUE; 3551 3552 /* condense columns of maij->B */ 3553 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3554 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3555 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3556 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3557 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3558 PetscFunctionReturn(0); 3559 } 3560 3561 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3562 3563 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3564 { 3565 PetscErrorCode ierr; 3566 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3567 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3568 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3569 Mat M,Msub,B=a->B; 3570 MatScalar *aa; 3571 Mat_SeqAIJ *aij; 3572 PetscInt *garray = a->garray,*colsub,Ncols; 3573 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3574 IS iscol_sub,iscmap; 3575 const PetscInt *is_idx,*cmap; 3576 PetscBool allcolumns=PETSC_FALSE; 3577 MPI_Comm comm; 3578 3579 PetscFunctionBegin; 3580 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3581 3582 if (call == MAT_REUSE_MATRIX) { 3583 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3584 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3585 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3586 3587 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3588 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3589 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3591 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3592 3593 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3594 3595 } else { /* call == MAT_INITIAL_MATRIX) */ 3596 PetscBool flg; 3597 3598 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3599 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3600 3601 /* (1) iscol -> nonscalable iscol_local */ 3602 /* Check for special case: each processor gets entire matrix columns */ 3603 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3604 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3605 if (allcolumns) { 3606 iscol_sub = iscol_local; 3607 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3608 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3609 3610 } else { 3611 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3612 PetscInt *idx,*cmap1,k; 3613 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3614 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3615 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3616 count = 0; 3617 k = 0; 3618 for (i=0; i<Ncols; i++) { 3619 j = is_idx[i]; 3620 if (j >= cstart && j < cend) { 3621 /* diagonal part of mat */ 3622 idx[count] = j; 3623 cmap1[count++] = i; /* column index in submat */ 3624 } else if (Bn) { 3625 /* off-diagonal part of mat */ 3626 if (j == garray[k]) { 3627 idx[count] = j; 3628 cmap1[count++] = i; /* column index in submat */ 3629 } else if (j > garray[k]) { 3630 while (j > garray[k] && k < Bn-1) k++; 3631 if (j == garray[k]) { 3632 idx[count] = j; 3633 cmap1[count++] = i; /* column index in submat */ 3634 } 3635 } 3636 } 3637 } 3638 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3639 3640 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3641 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3642 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3643 3644 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3645 } 3646 3647 /* (3) Create sequential Msub */ 3648 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3649 } 3650 3651 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3652 aij = (Mat_SeqAIJ*)(Msub)->data; 3653 ii = aij->i; 3654 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3655 3656 /* 3657 m - number of local rows 3658 Ncols - number of columns (same on all processors) 3659 rstart - first row in new global matrix generated 3660 */ 3661 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3662 3663 if (call == MAT_INITIAL_MATRIX) { 3664 /* (4) Create parallel newmat */ 3665 PetscMPIInt rank,size; 3666 PetscInt csize; 3667 3668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 3671 /* 3672 Determine the number of non-zeros in the diagonal and off-diagonal 3673 portions of the matrix in order to do correct preallocation 3674 */ 3675 3676 /* first get start and end of "diagonal" columns */ 3677 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3678 if (csize == PETSC_DECIDE) { 3679 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3680 if (mglobal == Ncols) { /* square matrix */ 3681 nlocal = m; 3682 } else { 3683 nlocal = Ncols/size + ((Ncols % size) > rank); 3684 } 3685 } else { 3686 nlocal = csize; 3687 } 3688 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3689 rstart = rend - nlocal; 3690 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3691 3692 /* next, compute all the lengths */ 3693 jj = aij->j; 3694 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3695 olens = dlens + m; 3696 for (i=0; i<m; i++) { 3697 jend = ii[i+1] - ii[i]; 3698 olen = 0; 3699 dlen = 0; 3700 for (j=0; j<jend; j++) { 3701 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3702 else dlen++; 3703 jj++; 3704 } 3705 olens[i] = olen; 3706 dlens[i] = dlen; 3707 } 3708 3709 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3710 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3711 3712 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3713 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3714 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3715 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3716 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3717 ierr = PetscFree(dlens);CHKERRQ(ierr); 3718 3719 } else { /* call == MAT_REUSE_MATRIX */ 3720 M = *newmat; 3721 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3722 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3723 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3724 /* 3725 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3726 rather than the slower MatSetValues(). 3727 */ 3728 M->was_assembled = PETSC_TRUE; 3729 M->assembled = PETSC_FALSE; 3730 } 3731 3732 /* (5) Set values of Msub to *newmat */ 3733 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3734 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3735 3736 jj = aij->j; 3737 aa = aij->a; 3738 for (i=0; i<m; i++) { 3739 row = rstart + i; 3740 nz = ii[i+1] - ii[i]; 3741 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3742 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3743 jj += nz; aa += nz; 3744 } 3745 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3746 3747 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3748 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3749 3750 ierr = PetscFree(colsub);CHKERRQ(ierr); 3751 3752 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3753 if (call == MAT_INITIAL_MATRIX) { 3754 *newmat = M; 3755 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3756 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3757 3758 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3759 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3760 3761 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3762 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3763 3764 if (iscol_local) { 3765 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3766 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3767 } 3768 } 3769 PetscFunctionReturn(0); 3770 } 3771 3772 /* 3773 Not great since it makes two copies of the submatrix, first an SeqAIJ 3774 in local and then by concatenating the local matrices the end result. 3775 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3776 3777 Note: This requires a sequential iscol with all indices. 3778 */ 3779 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3780 { 3781 PetscErrorCode ierr; 3782 PetscMPIInt rank,size; 3783 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3784 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3785 Mat M,Mreuse; 3786 MatScalar *aa,*vwork; 3787 MPI_Comm comm; 3788 Mat_SeqAIJ *aij; 3789 PetscBool colflag,allcolumns=PETSC_FALSE; 3790 3791 PetscFunctionBegin; 3792 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3793 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3794 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3795 3796 /* Check for special case: each processor gets entire matrix columns */ 3797 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3798 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3799 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3800 3801 if (call == MAT_REUSE_MATRIX) { 3802 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3803 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3804 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3805 } else { 3806 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3807 } 3808 3809 /* 3810 m - number of local rows 3811 n - number of columns (same on all processors) 3812 rstart - first row in new global matrix generated 3813 */ 3814 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3815 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3816 if (call == MAT_INITIAL_MATRIX) { 3817 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3818 ii = aij->i; 3819 jj = aij->j; 3820 3821 /* 3822 Determine the number of non-zeros in the diagonal and off-diagonal 3823 portions of the matrix in order to do correct preallocation 3824 */ 3825 3826 /* first get start and end of "diagonal" columns */ 3827 if (csize == PETSC_DECIDE) { 3828 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3829 if (mglobal == n) { /* square matrix */ 3830 nlocal = m; 3831 } else { 3832 nlocal = n/size + ((n % size) > rank); 3833 } 3834 } else { 3835 nlocal = csize; 3836 } 3837 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3838 rstart = rend - nlocal; 3839 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3840 3841 /* next, compute all the lengths */ 3842 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3843 olens = dlens + m; 3844 for (i=0; i<m; i++) { 3845 jend = ii[i+1] - ii[i]; 3846 olen = 0; 3847 dlen = 0; 3848 for (j=0; j<jend; j++) { 3849 if (*jj < rstart || *jj >= rend) olen++; 3850 else dlen++; 3851 jj++; 3852 } 3853 olens[i] = olen; 3854 dlens[i] = dlen; 3855 } 3856 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3857 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3858 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3859 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3860 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3861 ierr = PetscFree(dlens);CHKERRQ(ierr); 3862 } else { 3863 PetscInt ml,nl; 3864 3865 M = *newmat; 3866 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3867 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3868 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3869 /* 3870 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3871 rather than the slower MatSetValues(). 3872 */ 3873 M->was_assembled = PETSC_TRUE; 3874 M->assembled = PETSC_FALSE; 3875 } 3876 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3877 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3878 ii = aij->i; 3879 jj = aij->j; 3880 aa = aij->a; 3881 for (i=0; i<m; i++) { 3882 row = rstart + i; 3883 nz = ii[i+1] - ii[i]; 3884 cwork = jj; jj += nz; 3885 vwork = aa; aa += nz; 3886 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3887 } 3888 3889 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 *newmat = M; 3892 3893 /* save submatrix used in processor for next request */ 3894 if (call == MAT_INITIAL_MATRIX) { 3895 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3896 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3897 } 3898 PetscFunctionReturn(0); 3899 } 3900 3901 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3902 { 3903 PetscInt m,cstart, cend,j,nnz,i,d; 3904 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3905 const PetscInt *JJ; 3906 PetscScalar *values; 3907 PetscErrorCode ierr; 3908 PetscBool nooffprocentries; 3909 3910 PetscFunctionBegin; 3911 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3912 3913 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3914 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3915 m = B->rmap->n; 3916 cstart = B->cmap->rstart; 3917 cend = B->cmap->rend; 3918 rstart = B->rmap->rstart; 3919 3920 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3921 3922 #if defined(PETSC_USE_DEBUG) 3923 for (i=0; i<m && Ii; i++) { 3924 nnz = Ii[i+1]- Ii[i]; 3925 JJ = J + Ii[i]; 3926 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3927 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3928 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3929 } 3930 #endif 3931 3932 for (i=0; i<m && Ii; i++) { 3933 nnz = Ii[i+1]- Ii[i]; 3934 JJ = J + Ii[i]; 3935 nnz_max = PetscMax(nnz_max,nnz); 3936 d = 0; 3937 for (j=0; j<nnz; j++) { 3938 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3939 } 3940 d_nnz[i] = d; 3941 o_nnz[i] = nnz - d; 3942 } 3943 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3944 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3945 3946 if (v) values = (PetscScalar*)v; 3947 else { 3948 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3949 } 3950 3951 for (i=0; i<m && Ii; i++) { 3952 ii = i + rstart; 3953 nnz = Ii[i+1]- Ii[i]; 3954 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3955 } 3956 nooffprocentries = B->nooffprocentries; 3957 B->nooffprocentries = PETSC_TRUE; 3958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3960 B->nooffprocentries = nooffprocentries; 3961 3962 if (!v) { 3963 ierr = PetscFree(values);CHKERRQ(ierr); 3964 } 3965 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3966 PetscFunctionReturn(0); 3967 } 3968 3969 /*@ 3970 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3971 (the default parallel PETSc format). 3972 3973 Collective on MPI_Comm 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . i - the indices into j for the start of each local row (starts with zero) 3978 . j - the column indices for each local row (starts with zero) 3979 - v - optional values in the matrix 3980 3981 Level: developer 3982 3983 Notes: 3984 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3985 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3986 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3987 3988 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3989 3990 The format which is used for the sparse matrix input, is equivalent to a 3991 row-major ordering.. i.e for the following matrix, the input data expected is 3992 as shown 3993 3994 $ 1 0 0 3995 $ 2 0 3 P0 3996 $ ------- 3997 $ 4 5 6 P1 3998 $ 3999 $ Process0 [P0]: rows_owned=[0,1] 4000 $ i = {0,1,3} [size = nrow+1 = 2+1] 4001 $ j = {0,0,2} [size = 3] 4002 $ v = {1,2,3} [size = 3] 4003 $ 4004 $ Process1 [P1]: rows_owned=[2] 4005 $ i = {0,3} [size = nrow+1 = 1+1] 4006 $ j = {0,1,2} [size = 3] 4007 $ v = {4,5,6} [size = 3] 4008 4009 .keywords: matrix, aij, compressed row, sparse, parallel 4010 4011 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4012 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4013 @*/ 4014 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4015 { 4016 PetscErrorCode ierr; 4017 4018 PetscFunctionBegin; 4019 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4020 PetscFunctionReturn(0); 4021 } 4022 4023 /*@C 4024 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4025 (the default parallel PETSc format). For good matrix assembly performance 4026 the user should preallocate the matrix storage by setting the parameters 4027 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4028 performance can be increased by more than a factor of 50. 4029 4030 Collective on MPI_Comm 4031 4032 Input Parameters: 4033 + B - the matrix 4034 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4035 (same value is used for all local rows) 4036 . d_nnz - array containing the number of nonzeros in the various rows of the 4037 DIAGONAL portion of the local submatrix (possibly different for each row) 4038 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4039 The size of this array is equal to the number of local rows, i.e 'm'. 4040 For matrices that will be factored, you must leave room for (and set) 4041 the diagonal entry even if it is zero. 4042 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4043 submatrix (same value is used for all local rows). 4044 - o_nnz - array containing the number of nonzeros in the various rows of the 4045 OFF-DIAGONAL portion of the local submatrix (possibly different for 4046 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4047 structure. The size of this array is equal to the number 4048 of local rows, i.e 'm'. 4049 4050 If the *_nnz parameter is given then the *_nz parameter is ignored 4051 4052 The AIJ format (also called the Yale sparse matrix format or 4053 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4054 storage. The stored row and column indices begin with zero. 4055 See Users-Manual: ch_mat for details. 4056 4057 The parallel matrix is partitioned such that the first m0 rows belong to 4058 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4059 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4060 4061 The DIAGONAL portion of the local submatrix of a processor can be defined 4062 as the submatrix which is obtained by extraction the part corresponding to 4063 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4064 first row that belongs to the processor, r2 is the last row belonging to 4065 the this processor, and c1-c2 is range of indices of the local part of a 4066 vector suitable for applying the matrix to. This is an mxn matrix. In the 4067 common case of a square matrix, the row and column ranges are the same and 4068 the DIAGONAL part is also square. The remaining portion of the local 4069 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4070 4071 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4072 4073 You can call MatGetInfo() to get information on how effective the preallocation was; 4074 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4075 You can also run with the option -info and look for messages with the string 4076 malloc in them to see if additional memory allocation was needed. 4077 4078 Example usage: 4079 4080 Consider the following 8x8 matrix with 34 non-zero values, that is 4081 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4082 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4083 as follows: 4084 4085 .vb 4086 1 2 0 | 0 3 0 | 0 4 4087 Proc0 0 5 6 | 7 0 0 | 8 0 4088 9 0 10 | 11 0 0 | 12 0 4089 ------------------------------------- 4090 13 0 14 | 15 16 17 | 0 0 4091 Proc1 0 18 0 | 19 20 21 | 0 0 4092 0 0 0 | 22 23 0 | 24 0 4093 ------------------------------------- 4094 Proc2 25 26 27 | 0 0 28 | 29 0 4095 30 0 0 | 31 32 33 | 0 34 4096 .ve 4097 4098 This can be represented as a collection of submatrices as: 4099 4100 .vb 4101 A B C 4102 D E F 4103 G H I 4104 .ve 4105 4106 Where the submatrices A,B,C are owned by proc0, D,E,F are 4107 owned by proc1, G,H,I are owned by proc2. 4108 4109 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4110 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4111 The 'M','N' parameters are 8,8, and have the same values on all procs. 4112 4113 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4114 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4115 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4116 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4117 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4118 matrix, ans [DF] as another SeqAIJ matrix. 4119 4120 When d_nz, o_nz parameters are specified, d_nz storage elements are 4121 allocated for every row of the local diagonal submatrix, and o_nz 4122 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4123 One way to choose d_nz and o_nz is to use the max nonzerors per local 4124 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4125 In this case, the values of d_nz,o_nz are: 4126 .vb 4127 proc0 : dnz = 2, o_nz = 2 4128 proc1 : dnz = 3, o_nz = 2 4129 proc2 : dnz = 1, o_nz = 4 4130 .ve 4131 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4132 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4133 for proc3. i.e we are using 12+15+10=37 storage locations to store 4134 34 values. 4135 4136 When d_nnz, o_nnz parameters are specified, the storage is specified 4137 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4138 In the above case the values for d_nnz,o_nnz are: 4139 .vb 4140 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4141 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4142 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4143 .ve 4144 Here the space allocated is sum of all the above values i.e 34, and 4145 hence pre-allocation is perfect. 4146 4147 Level: intermediate 4148 4149 .keywords: matrix, aij, compressed row, sparse, parallel 4150 4151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4152 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4153 @*/ 4154 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4155 { 4156 PetscErrorCode ierr; 4157 4158 PetscFunctionBegin; 4159 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4160 PetscValidType(B,1); 4161 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4162 PetscFunctionReturn(0); 4163 } 4164 4165 /*@ 4166 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4167 CSR format the local rows. 4168 4169 Collective on MPI_Comm 4170 4171 Input Parameters: 4172 + comm - MPI communicator 4173 . m - number of local rows (Cannot be PETSC_DECIDE) 4174 . n - This value should be the same as the local size used in creating the 4175 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4176 calculated if N is given) For square matrices n is almost always m. 4177 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4178 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4179 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4180 . j - column indices 4181 - a - matrix values 4182 4183 Output Parameter: 4184 . mat - the matrix 4185 4186 Level: intermediate 4187 4188 Notes: 4189 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4190 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4191 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4192 4193 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4194 4195 The format which is used for the sparse matrix input, is equivalent to a 4196 row-major ordering.. i.e for the following matrix, the input data expected is 4197 as shown 4198 4199 $ 1 0 0 4200 $ 2 0 3 P0 4201 $ ------- 4202 $ 4 5 6 P1 4203 $ 4204 $ Process0 [P0]: rows_owned=[0,1] 4205 $ i = {0,1,3} [size = nrow+1 = 2+1] 4206 $ j = {0,0,2} [size = 3] 4207 $ v = {1,2,3} [size = 3] 4208 $ 4209 $ Process1 [P1]: rows_owned=[2] 4210 $ i = {0,3} [size = nrow+1 = 1+1] 4211 $ j = {0,1,2} [size = 3] 4212 $ v = {4,5,6} [size = 3] 4213 4214 .keywords: matrix, aij, compressed row, sparse, parallel 4215 4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4217 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4218 @*/ 4219 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4220 { 4221 PetscErrorCode ierr; 4222 4223 PetscFunctionBegin; 4224 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4225 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4226 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4227 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4228 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4229 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4230 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4231 PetscFunctionReturn(0); 4232 } 4233 4234 /*@C 4235 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4236 (the default parallel PETSc format). For good matrix assembly performance 4237 the user should preallocate the matrix storage by setting the parameters 4238 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4239 performance can be increased by more than a factor of 50. 4240 4241 Collective on MPI_Comm 4242 4243 Input Parameters: 4244 + comm - MPI communicator 4245 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4246 This value should be the same as the local size used in creating the 4247 y vector for the matrix-vector product y = Ax. 4248 . n - This value should be the same as the local size used in creating the 4249 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4250 calculated if N is given) For square matrices n is almost always m. 4251 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4252 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4253 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4254 (same value is used for all local rows) 4255 . d_nnz - array containing the number of nonzeros in the various rows of the 4256 DIAGONAL portion of the local submatrix (possibly different for each row) 4257 or NULL, if d_nz is used to specify the nonzero structure. 4258 The size of this array is equal to the number of local rows, i.e 'm'. 4259 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4260 submatrix (same value is used for all local rows). 4261 - o_nnz - array containing the number of nonzeros in the various rows of the 4262 OFF-DIAGONAL portion of the local submatrix (possibly different for 4263 each row) or NULL, if o_nz is used to specify the nonzero 4264 structure. The size of this array is equal to the number 4265 of local rows, i.e 'm'. 4266 4267 Output Parameter: 4268 . A - the matrix 4269 4270 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4271 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4272 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4273 4274 Notes: 4275 If the *_nnz parameter is given then the *_nz parameter is ignored 4276 4277 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4278 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4279 storage requirements for this matrix. 4280 4281 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4282 processor than it must be used on all processors that share the object for 4283 that argument. 4284 4285 The user MUST specify either the local or global matrix dimensions 4286 (possibly both). 4287 4288 The parallel matrix is partitioned across processors such that the 4289 first m0 rows belong to process 0, the next m1 rows belong to 4290 process 1, the next m2 rows belong to process 2 etc.. where 4291 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4292 values corresponding to [m x N] submatrix. 4293 4294 The columns are logically partitioned with the n0 columns belonging 4295 to 0th partition, the next n1 columns belonging to the next 4296 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4297 4298 The DIAGONAL portion of the local submatrix on any given processor 4299 is the submatrix corresponding to the rows and columns m,n 4300 corresponding to the given processor. i.e diagonal matrix on 4301 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4302 etc. The remaining portion of the local submatrix [m x (N-n)] 4303 constitute the OFF-DIAGONAL portion. The example below better 4304 illustrates this concept. 4305 4306 For a square global matrix we define each processor's diagonal portion 4307 to be its local rows and the corresponding columns (a square submatrix); 4308 each processor's off-diagonal portion encompasses the remainder of the 4309 local matrix (a rectangular submatrix). 4310 4311 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4312 4313 When calling this routine with a single process communicator, a matrix of 4314 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4315 type of communicator, use the construction mechanism 4316 .vb 4317 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4318 .ve 4319 4320 $ MatCreate(...,&A); 4321 $ MatSetType(A,MATMPIAIJ); 4322 $ MatSetSizes(A, m,n,M,N); 4323 $ MatMPIAIJSetPreallocation(A,...); 4324 4325 By default, this format uses inodes (identical nodes) when possible. 4326 We search for consecutive rows with the same nonzero structure, thereby 4327 reusing matrix information to achieve increased efficiency. 4328 4329 Options Database Keys: 4330 + -mat_no_inode - Do not use inodes 4331 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4332 4333 4334 4335 Example usage: 4336 4337 Consider the following 8x8 matrix with 34 non-zero values, that is 4338 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4339 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4340 as follows 4341 4342 .vb 4343 1 2 0 | 0 3 0 | 0 4 4344 Proc0 0 5 6 | 7 0 0 | 8 0 4345 9 0 10 | 11 0 0 | 12 0 4346 ------------------------------------- 4347 13 0 14 | 15 16 17 | 0 0 4348 Proc1 0 18 0 | 19 20 21 | 0 0 4349 0 0 0 | 22 23 0 | 24 0 4350 ------------------------------------- 4351 Proc2 25 26 27 | 0 0 28 | 29 0 4352 30 0 0 | 31 32 33 | 0 34 4353 .ve 4354 4355 This can be represented as a collection of submatrices as 4356 4357 .vb 4358 A B C 4359 D E F 4360 G H I 4361 .ve 4362 4363 Where the submatrices A,B,C are owned by proc0, D,E,F are 4364 owned by proc1, G,H,I are owned by proc2. 4365 4366 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4367 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4368 The 'M','N' parameters are 8,8, and have the same values on all procs. 4369 4370 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4371 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4372 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4373 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4374 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4375 matrix, ans [DF] as another SeqAIJ matrix. 4376 4377 When d_nz, o_nz parameters are specified, d_nz storage elements are 4378 allocated for every row of the local diagonal submatrix, and o_nz 4379 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4380 One way to choose d_nz and o_nz is to use the max nonzerors per local 4381 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4382 In this case, the values of d_nz,o_nz are 4383 .vb 4384 proc0 : dnz = 2, o_nz = 2 4385 proc1 : dnz = 3, o_nz = 2 4386 proc2 : dnz = 1, o_nz = 4 4387 .ve 4388 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4389 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4390 for proc3. i.e we are using 12+15+10=37 storage locations to store 4391 34 values. 4392 4393 When d_nnz, o_nnz parameters are specified, the storage is specified 4394 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4395 In the above case the values for d_nnz,o_nnz are 4396 .vb 4397 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4398 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4399 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4400 .ve 4401 Here the space allocated is sum of all the above values i.e 34, and 4402 hence pre-allocation is perfect. 4403 4404 Level: intermediate 4405 4406 .keywords: matrix, aij, compressed row, sparse, parallel 4407 4408 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4409 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4410 @*/ 4411 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4412 { 4413 PetscErrorCode ierr; 4414 PetscMPIInt size; 4415 4416 PetscFunctionBegin; 4417 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4418 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4420 if (size > 1) { 4421 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4422 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4423 } else { 4424 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4425 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4426 } 4427 PetscFunctionReturn(0); 4428 } 4429 4430 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4431 { 4432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4433 PetscBool flg; 4434 PetscErrorCode ierr; 4435 4436 PetscFunctionBegin; 4437 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4438 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4439 if (Ad) *Ad = a->A; 4440 if (Ao) *Ao = a->B; 4441 if (colmap) *colmap = a->garray; 4442 PetscFunctionReturn(0); 4443 } 4444 4445 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4446 { 4447 PetscErrorCode ierr; 4448 PetscInt m,N,i,rstart,nnz,Ii; 4449 PetscInt *indx; 4450 PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4454 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4455 PetscInt *dnz,*onz,sum,bs,cbs; 4456 4457 if (n == PETSC_DECIDE) { 4458 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4459 } 4460 /* Check sum(n) = N */ 4461 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4462 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4463 4464 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4465 rstart -= m; 4466 4467 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4468 for (i=0; i<m; i++) { 4469 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4470 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4471 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4472 } 4473 4474 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4475 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4476 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4477 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4478 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4479 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4480 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4481 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4482 } 4483 4484 /* numeric phase */ 4485 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4486 for (i=0; i<m; i++) { 4487 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4488 Ii = i + rstart; 4489 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4490 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4491 } 4492 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4493 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4498 { 4499 PetscErrorCode ierr; 4500 PetscMPIInt rank; 4501 PetscInt m,N,i,rstart,nnz; 4502 size_t len; 4503 const PetscInt *indx; 4504 PetscViewer out; 4505 char *name; 4506 Mat B; 4507 const PetscScalar *values; 4508 4509 PetscFunctionBegin; 4510 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4511 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4512 /* Should this be the type of the diagonal block of A? */ 4513 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4514 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4515 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4516 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4517 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4518 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4519 for (i=0; i<m; i++) { 4520 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4521 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4522 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4523 } 4524 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 4527 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4528 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4529 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4530 sprintf(name,"%s.%d",outfile,rank); 4531 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4532 ierr = PetscFree(name);CHKERRQ(ierr); 4533 ierr = MatView(B,out);CHKERRQ(ierr); 4534 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4535 ierr = MatDestroy(&B);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4540 { 4541 PetscErrorCode ierr; 4542 Mat_Merge_SeqsToMPI *merge; 4543 PetscContainer container; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4547 if (container) { 4548 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4561 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4562 ierr = PetscFree(merge);CHKERRQ(ierr); 4563 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4564 } 4565 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4566 PetscFunctionReturn(0); 4567 } 4568 4569 #include <../src/mat/utils/freespace.h> 4570 #include <petscbt.h> 4571 4572 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4573 { 4574 PetscErrorCode ierr; 4575 MPI_Comm comm; 4576 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4577 PetscMPIInt size,rank,taga,*len_s; 4578 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4579 PetscInt proc,m; 4580 PetscInt **buf_ri,**buf_rj; 4581 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4582 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4583 MPI_Request *s_waits,*r_waits; 4584 MPI_Status *status; 4585 MatScalar *aa=a->a; 4586 MatScalar **abuf_r,*ba_i; 4587 Mat_Merge_SeqsToMPI *merge; 4588 PetscContainer container; 4589 4590 PetscFunctionBegin; 4591 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4592 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4593 4594 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4595 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4596 4597 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4598 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4599 4600 bi = merge->bi; 4601 bj = merge->bj; 4602 buf_ri = merge->buf_ri; 4603 buf_rj = merge->buf_rj; 4604 4605 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4606 owners = merge->rowmap->range; 4607 len_s = merge->len_s; 4608 4609 /* send and recv matrix values */ 4610 /*-----------------------------*/ 4611 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4612 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4613 4614 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4615 for (proc=0,k=0; proc<size; proc++) { 4616 if (!len_s[proc]) continue; 4617 i = owners[proc]; 4618 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4619 k++; 4620 } 4621 4622 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4623 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4624 ierr = PetscFree(status);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4627 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4628 4629 /* insert mat values of mpimat */ 4630 /*----------------------------*/ 4631 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4632 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4633 4634 for (k=0; k<merge->nrecv; k++) { 4635 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4636 nrows = *(buf_ri_k[k]); 4637 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4638 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4639 } 4640 4641 /* set values of ba */ 4642 m = merge->rowmap->n; 4643 for (i=0; i<m; i++) { 4644 arow = owners[rank] + i; 4645 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4646 bnzi = bi[i+1] - bi[i]; 4647 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4648 4649 /* add local non-zero vals of this proc's seqmat into ba */ 4650 anzi = ai[arow+1] - ai[arow]; 4651 aj = a->j + ai[arow]; 4652 aa = a->a + ai[arow]; 4653 nextaj = 0; 4654 for (j=0; nextaj<anzi; j++) { 4655 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4656 ba_i[j] += aa[nextaj++]; 4657 } 4658 } 4659 4660 /* add received vals into ba */ 4661 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4662 /* i-th row */ 4663 if (i == *nextrow[k]) { 4664 anzi = *(nextai[k]+1) - *nextai[k]; 4665 aj = buf_rj[k] + *(nextai[k]); 4666 aa = abuf_r[k] + *(nextai[k]); 4667 nextaj = 0; 4668 for (j=0; nextaj<anzi; j++) { 4669 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4670 ba_i[j] += aa[nextaj++]; 4671 } 4672 } 4673 nextrow[k]++; nextai[k]++; 4674 } 4675 } 4676 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4677 } 4678 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4679 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4680 4681 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4682 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4683 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4684 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4685 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4686 PetscFunctionReturn(0); 4687 } 4688 4689 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4690 { 4691 PetscErrorCode ierr; 4692 Mat B_mpi; 4693 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4694 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4695 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4696 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4697 PetscInt len,proc,*dnz,*onz,bs,cbs; 4698 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4699 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4700 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4701 MPI_Status *status; 4702 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4703 PetscBT lnkbt; 4704 Mat_Merge_SeqsToMPI *merge; 4705 PetscContainer container; 4706 4707 PetscFunctionBegin; 4708 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4709 4710 /* make sure it is a PETSc comm */ 4711 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4712 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4714 4715 ierr = PetscNew(&merge);CHKERRQ(ierr); 4716 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4717 4718 /* determine row ownership */ 4719 /*---------------------------------------------------------*/ 4720 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4721 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4722 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4723 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4724 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4725 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4726 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4727 4728 m = merge->rowmap->n; 4729 owners = merge->rowmap->range; 4730 4731 /* determine the number of messages to send, their lengths */ 4732 /*---------------------------------------------------------*/ 4733 len_s = merge->len_s; 4734 4735 len = 0; /* length of buf_si[] */ 4736 merge->nsend = 0; 4737 for (proc=0; proc<size; proc++) { 4738 len_si[proc] = 0; 4739 if (proc == rank) { 4740 len_s[proc] = 0; 4741 } else { 4742 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4743 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4744 } 4745 if (len_s[proc]) { 4746 merge->nsend++; 4747 nrows = 0; 4748 for (i=owners[proc]; i<owners[proc+1]; i++) { 4749 if (ai[i+1] > ai[i]) nrows++; 4750 } 4751 len_si[proc] = 2*(nrows+1); 4752 len += len_si[proc]; 4753 } 4754 } 4755 4756 /* determine the number and length of messages to receive for ij-structure */ 4757 /*-------------------------------------------------------------------------*/ 4758 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4759 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4760 4761 /* post the Irecv of j-structure */ 4762 /*-------------------------------*/ 4763 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4764 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4765 4766 /* post the Isend of j-structure */ 4767 /*--------------------------------*/ 4768 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4769 4770 for (proc=0, k=0; proc<size; proc++) { 4771 if (!len_s[proc]) continue; 4772 i = owners[proc]; 4773 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4774 k++; 4775 } 4776 4777 /* receives and sends of j-structure are complete */ 4778 /*------------------------------------------------*/ 4779 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4780 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4781 4782 /* send and recv i-structure */ 4783 /*---------------------------*/ 4784 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4785 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4786 4787 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4788 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4789 for (proc=0,k=0; proc<size; proc++) { 4790 if (!len_s[proc]) continue; 4791 /* form outgoing message for i-structure: 4792 buf_si[0]: nrows to be sent 4793 [1:nrows]: row index (global) 4794 [nrows+1:2*nrows+1]: i-structure index 4795 */ 4796 /*-------------------------------------------*/ 4797 nrows = len_si[proc]/2 - 1; 4798 buf_si_i = buf_si + nrows+1; 4799 buf_si[0] = nrows; 4800 buf_si_i[0] = 0; 4801 nrows = 0; 4802 for (i=owners[proc]; i<owners[proc+1]; i++) { 4803 anzi = ai[i+1] - ai[i]; 4804 if (anzi) { 4805 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4806 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4807 nrows++; 4808 } 4809 } 4810 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4811 k++; 4812 buf_si += len_si[proc]; 4813 } 4814 4815 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4816 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4817 4818 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4819 for (i=0; i<merge->nrecv; i++) { 4820 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4821 } 4822 4823 ierr = PetscFree(len_si);CHKERRQ(ierr); 4824 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4825 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4826 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4827 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4828 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4829 ierr = PetscFree(status);CHKERRQ(ierr); 4830 4831 /* compute a local seq matrix in each processor */ 4832 /*----------------------------------------------*/ 4833 /* allocate bi array and free space for accumulating nonzero column info */ 4834 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4835 bi[0] = 0; 4836 4837 /* create and initialize a linked list */ 4838 nlnk = N+1; 4839 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4840 4841 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4842 len = ai[owners[rank+1]] - ai[owners[rank]]; 4843 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4844 4845 current_space = free_space; 4846 4847 /* determine symbolic info for each local row */ 4848 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4849 4850 for (k=0; k<merge->nrecv; k++) { 4851 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4852 nrows = *buf_ri_k[k]; 4853 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4854 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4855 } 4856 4857 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4858 len = 0; 4859 for (i=0; i<m; i++) { 4860 bnzi = 0; 4861 /* add local non-zero cols of this proc's seqmat into lnk */ 4862 arow = owners[rank] + i; 4863 anzi = ai[arow+1] - ai[arow]; 4864 aj = a->j + ai[arow]; 4865 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4866 bnzi += nlnk; 4867 /* add received col data into lnk */ 4868 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4869 if (i == *nextrow[k]) { /* i-th row */ 4870 anzi = *(nextai[k]+1) - *nextai[k]; 4871 aj = buf_rj[k] + *nextai[k]; 4872 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4873 bnzi += nlnk; 4874 nextrow[k]++; nextai[k]++; 4875 } 4876 } 4877 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4878 4879 /* if free space is not available, make more free space */ 4880 if (current_space->local_remaining<bnzi) { 4881 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4882 nspacedouble++; 4883 } 4884 /* copy data into free space, then initialize lnk */ 4885 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4886 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4887 4888 current_space->array += bnzi; 4889 current_space->local_used += bnzi; 4890 current_space->local_remaining -= bnzi; 4891 4892 bi[i+1] = bi[i] + bnzi; 4893 } 4894 4895 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4896 4897 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4898 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4899 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4900 4901 /* create symbolic parallel matrix B_mpi */ 4902 /*---------------------------------------*/ 4903 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4904 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4905 if (n==PETSC_DECIDE) { 4906 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4907 } else { 4908 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4909 } 4910 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4911 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4912 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4913 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4914 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4915 4916 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4917 B_mpi->assembled = PETSC_FALSE; 4918 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4919 merge->bi = bi; 4920 merge->bj = bj; 4921 merge->buf_ri = buf_ri; 4922 merge->buf_rj = buf_rj; 4923 merge->coi = NULL; 4924 merge->coj = NULL; 4925 merge->owners_co = NULL; 4926 4927 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4928 4929 /* attach the supporting struct to B_mpi for reuse */ 4930 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4931 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4932 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4933 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4934 *mpimat = B_mpi; 4935 4936 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4937 PetscFunctionReturn(0); 4938 } 4939 4940 /*@C 4941 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4942 matrices from each processor 4943 4944 Collective on MPI_Comm 4945 4946 Input Parameters: 4947 + comm - the communicators the parallel matrix will live on 4948 . seqmat - the input sequential matrices 4949 . m - number of local rows (or PETSC_DECIDE) 4950 . n - number of local columns (or PETSC_DECIDE) 4951 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4952 4953 Output Parameter: 4954 . mpimat - the parallel matrix generated 4955 4956 Level: advanced 4957 4958 Notes: 4959 The dimensions of the sequential matrix in each processor MUST be the same. 4960 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4961 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4962 @*/ 4963 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4964 { 4965 PetscErrorCode ierr; 4966 PetscMPIInt size; 4967 4968 PetscFunctionBegin; 4969 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4970 if (size == 1) { 4971 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4972 if (scall == MAT_INITIAL_MATRIX) { 4973 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4974 } else { 4975 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4976 } 4977 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4978 PetscFunctionReturn(0); 4979 } 4980 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 if (scall == MAT_INITIAL_MATRIX) { 4982 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4983 } 4984 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4985 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /*@ 4990 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4991 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4992 with MatGetSize() 4993 4994 Not Collective 4995 4996 Input Parameters: 4997 + A - the matrix 4998 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 5000 Output Parameter: 5001 . A_loc - the local sequential matrix generated 5002 5003 Level: developer 5004 5005 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5006 5007 @*/ 5008 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5009 { 5010 PetscErrorCode ierr; 5011 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5012 Mat_SeqAIJ *mat,*a,*b; 5013 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5014 MatScalar *aa,*ba,*cam; 5015 PetscScalar *ca; 5016 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5017 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5018 PetscBool match; 5019 MPI_Comm comm; 5020 PetscMPIInt size; 5021 5022 PetscFunctionBegin; 5023 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5024 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5025 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5026 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5027 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5028 5029 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5030 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5031 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5032 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5033 aa = a->a; ba = b->a; 5034 if (scall == MAT_INITIAL_MATRIX) { 5035 if (size == 1) { 5036 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5037 PetscFunctionReturn(0); 5038 } 5039 5040 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5041 ci[0] = 0; 5042 for (i=0; i<am; i++) { 5043 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5044 } 5045 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5046 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5047 k = 0; 5048 for (i=0; i<am; i++) { 5049 ncols_o = bi[i+1] - bi[i]; 5050 ncols_d = ai[i+1] - ai[i]; 5051 /* off-diagonal portion of A */ 5052 for (jo=0; jo<ncols_o; jo++) { 5053 col = cmap[*bj]; 5054 if (col >= cstart) break; 5055 cj[k] = col; bj++; 5056 ca[k++] = *ba++; 5057 } 5058 /* diagonal portion of A */ 5059 for (j=0; j<ncols_d; j++) { 5060 cj[k] = cstart + *aj++; 5061 ca[k++] = *aa++; 5062 } 5063 /* off-diagonal portion of A */ 5064 for (j=jo; j<ncols_o; j++) { 5065 cj[k] = cmap[*bj++]; 5066 ca[k++] = *ba++; 5067 } 5068 } 5069 /* put together the new matrix */ 5070 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5071 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5072 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5073 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5074 mat->free_a = PETSC_TRUE; 5075 mat->free_ij = PETSC_TRUE; 5076 mat->nonew = 0; 5077 } else if (scall == MAT_REUSE_MATRIX) { 5078 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5079 ci = mat->i; cj = mat->j; cam = mat->a; 5080 for (i=0; i<am; i++) { 5081 /* off-diagonal portion of A */ 5082 ncols_o = bi[i+1] - bi[i]; 5083 for (jo=0; jo<ncols_o; jo++) { 5084 col = cmap[*bj]; 5085 if (col >= cstart) break; 5086 *cam++ = *ba++; bj++; 5087 } 5088 /* diagonal portion of A */ 5089 ncols_d = ai[i+1] - ai[i]; 5090 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5091 /* off-diagonal portion of A */ 5092 for (j=jo; j<ncols_o; j++) { 5093 *cam++ = *ba++; bj++; 5094 } 5095 } 5096 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5097 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5098 PetscFunctionReturn(0); 5099 } 5100 5101 /*@C 5102 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5103 5104 Not Collective 5105 5106 Input Parameters: 5107 + A - the matrix 5108 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 - row, col - index sets of rows and columns to extract (or NULL) 5110 5111 Output Parameter: 5112 . A_loc - the local sequential matrix generated 5113 5114 Level: developer 5115 5116 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5117 5118 @*/ 5119 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5120 { 5121 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5122 PetscErrorCode ierr; 5123 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5124 IS isrowa,iscola; 5125 Mat *aloc; 5126 PetscBool match; 5127 5128 PetscFunctionBegin; 5129 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5130 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5131 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5132 if (!row) { 5133 start = A->rmap->rstart; end = A->rmap->rend; 5134 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5135 } else { 5136 isrowa = *row; 5137 } 5138 if (!col) { 5139 start = A->cmap->rstart; 5140 cmap = a->garray; 5141 nzA = a->A->cmap->n; 5142 nzB = a->B->cmap->n; 5143 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5144 ncols = 0; 5145 for (i=0; i<nzB; i++) { 5146 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5147 else break; 5148 } 5149 imark = i; 5150 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5151 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5152 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5153 } else { 5154 iscola = *col; 5155 } 5156 if (scall != MAT_INITIAL_MATRIX) { 5157 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5158 aloc[0] = *A_loc; 5159 } 5160 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5161 if (!col) { /* attach global id of condensed columns */ 5162 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5163 } 5164 *A_loc = aloc[0]; 5165 ierr = PetscFree(aloc);CHKERRQ(ierr); 5166 if (!row) { 5167 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5168 } 5169 if (!col) { 5170 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5171 } 5172 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5173 PetscFunctionReturn(0); 5174 } 5175 5176 /*@C 5177 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5178 5179 Collective on Mat 5180 5181 Input Parameters: 5182 + A,B - the matrices in mpiaij format 5183 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5184 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5185 5186 Output Parameter: 5187 + rowb, colb - index sets of rows and columns of B to extract 5188 - B_seq - the sequential matrix generated 5189 5190 Level: developer 5191 5192 @*/ 5193 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5194 { 5195 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5196 PetscErrorCode ierr; 5197 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5198 IS isrowb,iscolb; 5199 Mat *bseq=NULL; 5200 5201 PetscFunctionBegin; 5202 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5203 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5204 } 5205 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5206 5207 if (scall == MAT_INITIAL_MATRIX) { 5208 start = A->cmap->rstart; 5209 cmap = a->garray; 5210 nzA = a->A->cmap->n; 5211 nzB = a->B->cmap->n; 5212 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5213 ncols = 0; 5214 for (i=0; i<nzB; i++) { /* row < local row index */ 5215 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5216 else break; 5217 } 5218 imark = i; 5219 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5220 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5221 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5222 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5223 } else { 5224 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5225 isrowb = *rowb; iscolb = *colb; 5226 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5227 bseq[0] = *B_seq; 5228 } 5229 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5230 *B_seq = bseq[0]; 5231 ierr = PetscFree(bseq);CHKERRQ(ierr); 5232 if (!rowb) { 5233 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5234 } else { 5235 *rowb = isrowb; 5236 } 5237 if (!colb) { 5238 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5239 } else { 5240 *colb = iscolb; 5241 } 5242 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5243 PetscFunctionReturn(0); 5244 } 5245 5246 /* 5247 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5248 of the OFF-DIAGONAL portion of local A 5249 5250 Collective on Mat 5251 5252 Input Parameters: 5253 + A,B - the matrices in mpiaij format 5254 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5255 5256 Output Parameter: 5257 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5258 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5259 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5260 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5261 5262 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5263 for this matrix. This is not desirable.. 5264 5265 Level: developer 5266 5267 */ 5268 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5269 { 5270 PetscErrorCode ierr; 5271 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5272 Mat_SeqAIJ *b_oth; 5273 VecScatter ctx; 5274 MPI_Comm comm; 5275 const PetscMPIInt *rprocs,*sprocs; 5276 const PetscInt *srow,*rstarts,*sstarts; 5277 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5278 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5279 PetscScalar *b_otha,*bufa,*bufA,*vals; 5280 MPI_Request *rwaits = NULL,*swaits = NULL; 5281 MPI_Status rstatus; 5282 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5283 5284 PetscFunctionBegin; 5285 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5286 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5287 5288 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5289 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5290 } 5291 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5292 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5293 5294 if (size == 1) { 5295 startsj_s = NULL; 5296 bufa_ptr = NULL; 5297 *B_oth = NULL; 5298 PetscFunctionReturn(0); 5299 } 5300 5301 ctx = a->Mvctx; 5302 tag = ((PetscObject)ctx)->tag; 5303 5304 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5305 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5306 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5307 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5308 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5309 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5310 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5311 5312 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5313 if (scall == MAT_INITIAL_MATRIX) { 5314 /* i-array */ 5315 /*---------*/ 5316 /* post receives */ 5317 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5318 for (i=0; i<nrecvs; i++) { 5319 rowlen = rvalues + rstarts[i]*rbs; 5320 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5321 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5322 } 5323 5324 /* pack the outgoing message */ 5325 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5326 5327 sstartsj[0] = 0; 5328 rstartsj[0] = 0; 5329 len = 0; /* total length of j or a array to be sent */ 5330 if (nsends) { 5331 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5332 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5333 } 5334 for (i=0; i<nsends; i++) { 5335 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5336 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5337 for (j=0; j<nrows; j++) { 5338 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5339 for (l=0; l<sbs; l++) { 5340 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5341 5342 rowlen[j*sbs+l] = ncols; 5343 5344 len += ncols; 5345 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5346 } 5347 k++; 5348 } 5349 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5350 5351 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5352 } 5353 /* recvs and sends of i-array are completed */ 5354 i = nrecvs; 5355 while (i--) { 5356 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5357 } 5358 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5359 ierr = PetscFree(svalues);CHKERRQ(ierr); 5360 5361 /* allocate buffers for sending j and a arrays */ 5362 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5363 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5364 5365 /* create i-array of B_oth */ 5366 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5367 5368 b_othi[0] = 0; 5369 len = 0; /* total length of j or a array to be received */ 5370 k = 0; 5371 for (i=0; i<nrecvs; i++) { 5372 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5373 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5374 for (j=0; j<nrows; j++) { 5375 b_othi[k+1] = b_othi[k] + rowlen[j]; 5376 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5377 k++; 5378 } 5379 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5380 } 5381 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5382 5383 /* allocate space for j and a arrrays of B_oth */ 5384 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5385 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5386 5387 /* j-array */ 5388 /*---------*/ 5389 /* post receives of j-array */ 5390 for (i=0; i<nrecvs; i++) { 5391 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5392 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5393 } 5394 5395 /* pack the outgoing message j-array */ 5396 if (nsends) k = sstarts[0]; 5397 for (i=0; i<nsends; i++) { 5398 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5399 bufJ = bufj+sstartsj[i]; 5400 for (j=0; j<nrows; j++) { 5401 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5402 for (ll=0; ll<sbs; ll++) { 5403 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5404 for (l=0; l<ncols; l++) { 5405 *bufJ++ = cols[l]; 5406 } 5407 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5408 } 5409 } 5410 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5411 } 5412 5413 /* recvs and sends of j-array are completed */ 5414 i = nrecvs; 5415 while (i--) { 5416 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5417 } 5418 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5419 } else if (scall == MAT_REUSE_MATRIX) { 5420 sstartsj = *startsj_s; 5421 rstartsj = *startsj_r; 5422 bufa = *bufa_ptr; 5423 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5424 b_otha = b_oth->a; 5425 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5426 5427 /* a-array */ 5428 /*---------*/ 5429 /* post receives of a-array */ 5430 for (i=0; i<nrecvs; i++) { 5431 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5432 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5433 } 5434 5435 /* pack the outgoing message a-array */ 5436 if (nsends) k = sstarts[0]; 5437 for (i=0; i<nsends; i++) { 5438 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5439 bufA = bufa+sstartsj[i]; 5440 for (j=0; j<nrows; j++) { 5441 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5442 for (ll=0; ll<sbs; ll++) { 5443 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5444 for (l=0; l<ncols; l++) { 5445 *bufA++ = vals[l]; 5446 } 5447 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5448 } 5449 } 5450 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5451 } 5452 /* recvs and sends of a-array are completed */ 5453 i = nrecvs; 5454 while (i--) { 5455 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5456 } 5457 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5458 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5459 5460 if (scall == MAT_INITIAL_MATRIX) { 5461 /* put together the new matrix */ 5462 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5463 5464 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5465 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5466 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5467 b_oth->free_a = PETSC_TRUE; 5468 b_oth->free_ij = PETSC_TRUE; 5469 b_oth->nonew = 0; 5470 5471 ierr = PetscFree(bufj);CHKERRQ(ierr); 5472 if (!startsj_s || !bufa_ptr) { 5473 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5474 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5475 } else { 5476 *startsj_s = sstartsj; 5477 *startsj_r = rstartsj; 5478 *bufa_ptr = bufa; 5479 } 5480 } 5481 5482 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5483 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5484 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5485 PetscFunctionReturn(0); 5486 } 5487 5488 /*@C 5489 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5490 5491 Not Collective 5492 5493 Input Parameters: 5494 . A - The matrix in mpiaij format 5495 5496 Output Parameter: 5497 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5498 . colmap - A map from global column index to local index into lvec 5499 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5500 5501 Level: developer 5502 5503 @*/ 5504 #if defined(PETSC_USE_CTABLE) 5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5506 #else 5507 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5508 #endif 5509 { 5510 Mat_MPIAIJ *a; 5511 5512 PetscFunctionBegin; 5513 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5514 PetscValidPointer(lvec, 2); 5515 PetscValidPointer(colmap, 3); 5516 PetscValidPointer(multScatter, 4); 5517 a = (Mat_MPIAIJ*) A->data; 5518 if (lvec) *lvec = a->lvec; 5519 if (colmap) *colmap = a->colmap; 5520 if (multScatter) *multScatter = a->Mvctx; 5521 PetscFunctionReturn(0); 5522 } 5523 5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5526 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5527 #if defined(PETSC_HAVE_MKL_SPARSE) 5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5529 #endif 5530 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5531 #if defined(PETSC_HAVE_ELEMENTAL) 5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5533 #endif 5534 #if defined(PETSC_HAVE_HYPRE) 5535 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5536 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5537 #endif 5538 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5540 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5541 5542 /* 5543 Computes (B'*A')' since computing B*A directly is untenable 5544 5545 n p p 5546 ( ) ( ) ( ) 5547 m ( A ) * n ( B ) = m ( C ) 5548 ( ) ( ) ( ) 5549 5550 */ 5551 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5552 { 5553 PetscErrorCode ierr; 5554 Mat At,Bt,Ct; 5555 5556 PetscFunctionBegin; 5557 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5558 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5559 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5560 ierr = MatDestroy(&At);CHKERRQ(ierr); 5561 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5562 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5563 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5564 PetscFunctionReturn(0); 5565 } 5566 5567 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5568 { 5569 PetscErrorCode ierr; 5570 PetscInt m=A->rmap->n,n=B->cmap->n; 5571 Mat Cmat; 5572 5573 PetscFunctionBegin; 5574 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5575 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5576 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5577 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5578 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5579 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5580 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5581 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5582 5583 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5584 5585 *C = Cmat; 5586 PetscFunctionReturn(0); 5587 } 5588 5589 /* ----------------------------------------------------------------*/ 5590 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5591 { 5592 PetscErrorCode ierr; 5593 5594 PetscFunctionBegin; 5595 if (scall == MAT_INITIAL_MATRIX) { 5596 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5597 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5598 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5599 } 5600 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5601 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5602 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5603 PetscFunctionReturn(0); 5604 } 5605 5606 /*MC 5607 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5608 5609 Options Database Keys: 5610 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5611 5612 Level: beginner 5613 5614 .seealso: MatCreateAIJ() 5615 M*/ 5616 5617 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5618 { 5619 Mat_MPIAIJ *b; 5620 PetscErrorCode ierr; 5621 PetscMPIInt size; 5622 5623 PetscFunctionBegin; 5624 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5625 5626 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5627 B->data = (void*)b; 5628 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5629 B->assembled = PETSC_FALSE; 5630 B->insertmode = NOT_SET_VALUES; 5631 b->size = size; 5632 5633 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5634 5635 /* build cache for off array entries formed */ 5636 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5637 5638 b->donotstash = PETSC_FALSE; 5639 b->colmap = 0; 5640 b->garray = 0; 5641 b->roworiented = PETSC_TRUE; 5642 5643 /* stuff used for matrix vector multiply */ 5644 b->lvec = NULL; 5645 b->Mvctx = NULL; 5646 5647 /* stuff for MatGetRow() */ 5648 b->rowindices = 0; 5649 b->rowvalues = 0; 5650 b->getrowactive = PETSC_FALSE; 5651 5652 /* flexible pointer used in CUSP/CUSPARSE classes */ 5653 b->spptr = NULL; 5654 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5665 #if defined(PETSC_HAVE_MKL_SPARSE) 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5667 #endif 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5670 #if defined(PETSC_HAVE_ELEMENTAL) 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5672 #endif 5673 #if defined(PETSC_HAVE_HYPRE) 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5675 #endif 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5681 #if defined(PETSC_HAVE_HYPRE) 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5683 #endif 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5686 PetscFunctionReturn(0); 5687 } 5688 5689 /*@C 5690 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5691 and "off-diagonal" part of the matrix in CSR format. 5692 5693 Collective on MPI_Comm 5694 5695 Input Parameters: 5696 + comm - MPI communicator 5697 . m - number of local rows (Cannot be PETSC_DECIDE) 5698 . n - This value should be the same as the local size used in creating the 5699 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5700 calculated if N is given) For square matrices n is almost always m. 5701 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5702 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5703 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5704 . j - column indices 5705 . a - matrix values 5706 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5707 . oj - column indices 5708 - oa - matrix values 5709 5710 Output Parameter: 5711 . mat - the matrix 5712 5713 Level: advanced 5714 5715 Notes: 5716 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5717 must free the arrays once the matrix has been destroyed and not before. 5718 5719 The i and j indices are 0 based 5720 5721 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5722 5723 This sets local rows and cannot be used to set off-processor values. 5724 5725 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5726 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5727 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5728 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5729 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5730 communication if it is known that only local entries will be set. 5731 5732 .keywords: matrix, aij, compressed row, sparse, parallel 5733 5734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5735 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5736 @*/ 5737 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5738 { 5739 PetscErrorCode ierr; 5740 Mat_MPIAIJ *maij; 5741 5742 PetscFunctionBegin; 5743 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5744 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5745 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5746 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5747 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5748 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5749 maij = (Mat_MPIAIJ*) (*mat)->data; 5750 5751 (*mat)->preallocated = PETSC_TRUE; 5752 5753 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5754 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5755 5756 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5757 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5758 5759 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5760 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5761 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5762 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5763 5764 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5765 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5766 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5767 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5768 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5769 PetscFunctionReturn(0); 5770 } 5771 5772 /* 5773 Special version for direct calls from Fortran 5774 */ 5775 #include <petsc/private/fortranimpl.h> 5776 5777 /* Change these macros so can be used in void function */ 5778 #undef CHKERRQ 5779 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5780 #undef SETERRQ2 5781 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5782 #undef SETERRQ3 5783 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5784 #undef SETERRQ 5785 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5786 5787 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5788 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5789 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5790 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5791 #else 5792 #endif 5793 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5794 { 5795 Mat mat = *mmat; 5796 PetscInt m = *mm, n = *mn; 5797 InsertMode addv = *maddv; 5798 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5799 PetscScalar value; 5800 PetscErrorCode ierr; 5801 5802 MatCheckPreallocated(mat,1); 5803 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5804 5805 #if defined(PETSC_USE_DEBUG) 5806 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5807 #endif 5808 { 5809 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5810 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5811 PetscBool roworiented = aij->roworiented; 5812 5813 /* Some Variables required in the macro */ 5814 Mat A = aij->A; 5815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5816 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5817 MatScalar *aa = a->a; 5818 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5819 Mat B = aij->B; 5820 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5821 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5822 MatScalar *ba = b->a; 5823 5824 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5825 PetscInt nonew = a->nonew; 5826 MatScalar *ap1,*ap2; 5827 5828 PetscFunctionBegin; 5829 for (i=0; i<m; i++) { 5830 if (im[i] < 0) continue; 5831 #if defined(PETSC_USE_DEBUG) 5832 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5833 #endif 5834 if (im[i] >= rstart && im[i] < rend) { 5835 row = im[i] - rstart; 5836 lastcol1 = -1; 5837 rp1 = aj + ai[row]; 5838 ap1 = aa + ai[row]; 5839 rmax1 = aimax[row]; 5840 nrow1 = ailen[row]; 5841 low1 = 0; 5842 high1 = nrow1; 5843 lastcol2 = -1; 5844 rp2 = bj + bi[row]; 5845 ap2 = ba + bi[row]; 5846 rmax2 = bimax[row]; 5847 nrow2 = bilen[row]; 5848 low2 = 0; 5849 high2 = nrow2; 5850 5851 for (j=0; j<n; j++) { 5852 if (roworiented) value = v[i*n+j]; 5853 else value = v[i+j*m]; 5854 if (in[j] >= cstart && in[j] < cend) { 5855 col = in[j] - cstart; 5856 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5857 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5858 } else if (in[j] < 0) continue; 5859 #if defined(PETSC_USE_DEBUG) 5860 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5861 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5862 #endif 5863 else { 5864 if (mat->was_assembled) { 5865 if (!aij->colmap) { 5866 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5867 } 5868 #if defined(PETSC_USE_CTABLE) 5869 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5870 col--; 5871 #else 5872 col = aij->colmap[in[j]] - 1; 5873 #endif 5874 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5875 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5876 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5877 col = in[j]; 5878 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5879 B = aij->B; 5880 b = (Mat_SeqAIJ*)B->data; 5881 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5882 rp2 = bj + bi[row]; 5883 ap2 = ba + bi[row]; 5884 rmax2 = bimax[row]; 5885 nrow2 = bilen[row]; 5886 low2 = 0; 5887 high2 = nrow2; 5888 bm = aij->B->rmap->n; 5889 ba = b->a; 5890 } 5891 } else col = in[j]; 5892 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5893 } 5894 } 5895 } else if (!aij->donotstash) { 5896 if (roworiented) { 5897 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5898 } else { 5899 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5900 } 5901 } 5902 } 5903 } 5904 PetscFunctionReturnVoid(); 5905 } 5906