1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 /* Easy test: symmetric diagonal block */ 1108 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1109 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1110 if (!*f) PetscFunctionReturn(0); 1111 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 if (size == 1) PetscFunctionReturn(0); 1114 1115 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1116 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1117 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1119 for (i=0; i<first; i++) notme[i] = i; 1120 for (i=last; i<M; i++) notme[i-last+first] = i; 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1122 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1123 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1124 Aoff = Aoffs[0]; 1125 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1126 Boff = Boffs[0]; 1127 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1129 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1131 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1132 ierr = PetscFree(notme);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 /* receive remote parts */ 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1167 { 1168 PetscErrorCode ierr; 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 1171 PetscFunctionBegin; 1172 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1173 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1185 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 #if defined(PETSC_USE_LOG) 1196 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1197 #endif 1198 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1199 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1201 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1202 #if defined(PETSC_USE_CTABLE) 1203 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1204 #else 1205 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1208 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1210 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1211 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1212 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1213 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_ELEMENTAL) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1230 #endif 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1240 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1241 PetscErrorCode ierr; 1242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1243 int fd; 1244 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1245 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1246 PetscScalar *column_values; 1247 PetscInt message_count,flowcontrolcount; 1248 FILE *file; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1253 nz = A->nz + B->nz; 1254 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 if (!rank) { 1256 header[0] = MAT_FILE_CLASSID; 1257 header[1] = mat->rmap->N; 1258 header[2] = mat->cmap->N; 1259 1260 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 /* get largest number of rows any processor has */ 1263 rlen = mat->rmap->n; 1264 range = mat->rmap->range; 1265 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1266 } else { 1267 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 rlen = mat->rmap->n; 1269 } 1270 1271 /* load up the local row counts */ 1272 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1273 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1274 1275 /* store the row lengths to the file */ 1276 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1277 if (!rank) { 1278 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 for (i=1; i<size; i++) { 1280 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1281 rlen = range[i+1] - range[i]; 1282 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1288 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1292 1293 /* load up the local column indices */ 1294 nzmax = nz; /* th processor needs space a largest processor needs */ 1295 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1297 cnt = 0; 1298 for (i=0; i<mat->rmap->n; i++) { 1299 for (j=B->i[i]; j<B->i[i+1]; j++) { 1300 if ((col = garray[B->j[j]]) > cstart) break; 1301 column_indices[cnt++] = col; 1302 } 1303 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1304 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1305 } 1306 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1307 1308 /* store the column indices to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 MPI_Status status; 1312 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1317 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1328 1329 /* load up the local column values */ 1330 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if (garray[B->j[j]] > cstart) break; 1335 column_values[cnt++] = B->a[j]; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1338 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column values to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_values);CHKERRQ(ierr); 1362 1363 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1364 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #include <petscdraw.h> 1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1372 PetscErrorCode ierr; 1373 PetscMPIInt rank = aij->rank,size = aij->size; 1374 PetscBool isdraw,iascii,isbinary; 1375 PetscViewer sviewer; 1376 PetscViewerFormat format; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1382 if (iascii) { 1383 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1384 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1385 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1386 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1387 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1388 for (i=0; i<(PetscInt)size; i++) { 1389 nmax = PetscMax(nmax,nz[i]); 1390 nmin = PetscMin(nmin,nz[i]); 1391 navg += nz[i]; 1392 } 1393 ierr = PetscFree(nz);CHKERRQ(ierr); 1394 navg = navg/size; 1395 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1399 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1400 MatInfo info; 1401 PetscBool inodes; 1402 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1404 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1405 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1407 if (!inodes) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1412 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1413 } 1414 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1421 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1424 PetscInt inodecount,inodelimit,*inodes; 1425 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1426 if (inodes) { 1427 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1433 PetscFunctionReturn(0); 1434 } 1435 } else if (isbinary) { 1436 if (size == 1) { 1437 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1439 } else { 1440 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (isdraw) { 1444 PetscDraw draw; 1445 PetscBool isnull; 1446 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1447 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1448 if (isnull) PetscFunctionReturn(0); 1449 } 1450 1451 { 1452 /* assemble the entire matrix onto first processor. */ 1453 Mat A; 1454 Mat_SeqAIJ *Aloc; 1455 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1456 MatScalar *a; 1457 1458 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1463 } 1464 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1465 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1466 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1467 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1469 1470 /* copy over the A part */ 1471 Aloc = (Mat_SeqAIJ*)aij->A->data; 1472 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 row = mat->rmap->rstart; 1474 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1475 for (i=0; i<m; i++) { 1476 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1477 row++; 1478 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1479 } 1480 aj = Aloc->j; 1481 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1482 1483 /* copy over the B part */ 1484 Aloc = (Mat_SeqAIJ*)aij->B->data; 1485 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 row = mat->rmap->rstart; 1487 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1488 ct = cols; 1489 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1490 for (i=0; i<m; i++) { 1491 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1492 row++; 1493 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1494 } 1495 ierr = PetscFree(ct);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 /* 1499 Everyone has to call to draw the matrix since the graphics waits are 1500 synchronized across all processors that share the PetscDraw object 1501 */ 1502 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1505 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1506 } 1507 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1515 { 1516 PetscErrorCode ierr; 1517 PetscBool iascii,isdraw,issocket,isbinary; 1518 1519 PetscFunctionBegin; 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1524 if (iascii || isdraw || isbinary || issocket) { 1525 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1531 { 1532 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1533 PetscErrorCode ierr; 1534 Vec bb1 = 0; 1535 PetscBool hasop; 1536 1537 PetscFunctionBegin; 1538 if (flag == SOR_APPLY_UPPER) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1544 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1545 } 1546 1547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1548 if (flag & SOR_ZERO_INITIAL_GUESS) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 its--; 1551 } 1552 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_EISENSTAT) { 1597 Vec xx1; 1598 1599 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1601 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (!mat->diag) { 1605 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1606 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1607 } 1608 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1609 if (hasop) { 1610 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1611 } else { 1612 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1613 } 1614 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1615 1616 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1620 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1621 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1622 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1623 1624 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1625 1626 matin->factorerrortype = mat->A->factorerrortype; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1631 { 1632 Mat aA,aB,Aperm; 1633 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1634 PetscScalar *aa,*ba; 1635 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1636 PetscSF rowsf,sf; 1637 IS parcolp = NULL; 1638 PetscBool done; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1644 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1645 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1646 1647 /* Invert row permutation to find out where my rows should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1652 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 1655 /* Invert column permutation to find out where my columns should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1660 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1665 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1666 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1667 1668 /* Find out where my gcols should go */ 1669 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1670 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1672 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1673 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1674 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1676 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1677 1678 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1680 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1681 for (i=0; i<m; i++) { 1682 PetscInt row = rdest[i],rowner; 1683 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1684 for (j=ai[i]; j<ai[i+1]; j++) { 1685 PetscInt cowner,col = cdest[aj[j]]; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 for (j=bi[i]; j<bi[i+1]; j++) { 1691 PetscInt cowner,col = gcdest[bj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscReal isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 MatCheckPreallocated(A,1); 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 case MAT_STRUCTURALLY_SYMMETRIC: 1838 MatCheckPreallocated(A,1); 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_SYMMETRY_ETERNAL: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2019 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2020 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2021 PetscErrorCode ierr; 2022 Mat B,A_diag,*B_diag; 2023 MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 } 2062 2063 b = (Mat_MPIAIJ*)B->data; 2064 A_diag = a->A; 2065 B_diag = &b->A; 2066 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2067 A_diag_ncol = A_diag->cmap->N; 2068 B_diag_ilen = sub_B_diag->ilen; 2069 B_diag_i = sub_B_diag->i; 2070 2071 /* Set ilen for diagonal of B */ 2072 for (i=0; i<A_diag_ncol; i++) { 2073 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2074 } 2075 2076 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2077 very quickly (=without using MatSetValues), because all writes are local. */ 2078 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2079 2080 /* copy over the B part */ 2081 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2082 array = Bloc->a; 2083 row = A->rmap->rstart; 2084 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2085 cols_tmp = cols; 2086 for (i=0; i<mb; i++) { 2087 ncol = bi[i+1]-bi[i]; 2088 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2089 row++; 2090 array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2105 { 2106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2107 Mat a = aij->A,b = aij->B; 2108 PetscErrorCode ierr; 2109 PetscInt s1,s2,s3; 2110 2111 PetscFunctionBegin; 2112 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2113 if (rr) { 2114 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2115 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2116 /* Overlap communication with computation. */ 2117 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 } 2119 if (ll) { 2120 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2121 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2122 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2123 } 2124 /* scale the diagonal block */ 2125 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2126 2127 if (rr) { 2128 /* Do a scatter end and then right scale the off-diagonal block */ 2129 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2130 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2146 { 2147 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2148 Mat a,b,c,d; 2149 PetscBool flg; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 a = matA->A; b = matA->B; 2154 c = matB->A; d = matB->B; 2155 2156 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2157 if (flg) { 2158 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2159 } 2160 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2165 { 2166 PetscErrorCode ierr; 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2168 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2169 2170 PetscFunctionBegin; 2171 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2172 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2173 /* because of the column compression in the off-processor part of the matrix a->B, 2174 the number of columns in a->B and b->B may be different, hence we cannot call 2175 the MatCopy() directly on the two parts. If need be, we can provide a more 2176 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2177 then copying the submatrices */ 2178 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2179 } else { 2180 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2181 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2182 } 2183 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2188 { 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /* 2197 Computes the number of nonzeros per row needed for preallocation when X and Y 2198 have different nonzero structure. 2199 */ 2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2201 { 2202 PetscInt i,j,k,nzx,nzy; 2203 2204 PetscFunctionBegin; 2205 /* Set the number of nonzeros in the new matrix */ 2206 for (i=0; i<m; i++) { 2207 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2208 nzx = xi[i+1] - xi[i]; 2209 nzy = yi[i+1] - yi[i]; 2210 nnz[i] = 0; 2211 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2212 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2213 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2214 nnz[i]++; 2215 } 2216 for (; k<nzy; k++) nnz[i]++; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2223 { 2224 PetscErrorCode ierr; 2225 PetscInt m = Y->rmap->N; 2226 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2227 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2238 PetscBLASInt bnz,one=1; 2239 Mat_SeqAIJ *x,*y; 2240 2241 PetscFunctionBegin; 2242 if (str == SAME_NONZERO_PATTERN) { 2243 PetscScalar alpha = a; 2244 x = (Mat_SeqAIJ*)xx->A->data; 2245 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2246 y = (Mat_SeqAIJ*)yy->A->data; 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 x = (Mat_SeqAIJ*)xx->B->data; 2249 y = (Mat_SeqAIJ*)yy->B->data; 2250 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2251 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2252 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2253 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2254 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2255 } else { 2256 Mat B; 2257 PetscInt *nnz_d,*nnz_o; 2258 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2259 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2260 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2261 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2262 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2263 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2264 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2265 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2266 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2268 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2269 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2270 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2271 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2277 2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2279 { 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscErrorCode ierr; 2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2283 2284 PetscFunctionBegin; 2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2287 #else 2288 PetscFunctionBegin; 2289 #endif 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2300 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 PetscInt i,*idxb = 0; 2320 PetscScalar *va,*vb; 2321 Vec vtmp; 2322 2323 PetscFunctionBegin; 2324 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2325 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2326 if (idx) { 2327 for (i=0; i<A->rmap->n; i++) { 2328 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2329 } 2330 } 2331 2332 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2333 if (idx) { 2334 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2335 } 2336 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2337 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2338 2339 for (i=0; i<A->rmap->n; i++) { 2340 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2341 va[i] = vb[i]; 2342 if (idx) idx[i] = a->garray[idxb[i]]; 2343 } 2344 } 2345 2346 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2347 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2348 ierr = PetscFree(idxb);CHKERRQ(ierr); 2349 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2354 { 2355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2356 PetscErrorCode ierr; 2357 PetscInt i,*idxb = 0; 2358 PetscScalar *va,*vb; 2359 Vec vtmp; 2360 2361 PetscFunctionBegin; 2362 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2364 if (idx) { 2365 for (i=0; i<A->cmap->n; i++) { 2366 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2367 } 2368 } 2369 2370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2371 if (idx) { 2372 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2373 } 2374 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2375 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2376 2377 for (i=0; i<A->rmap->n; i++) { 2378 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2379 va[i] = vb[i]; 2380 if (idx) idx[i] = a->garray[idxb[i]]; 2381 } 2382 } 2383 2384 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2385 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2386 ierr = PetscFree(idxb);CHKERRQ(ierr); 2387 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2392 { 2393 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2394 PetscInt n = A->rmap->n; 2395 PetscInt cstart = A->cmap->rstart; 2396 PetscInt *cmap = mat->garray; 2397 PetscInt *diagIdx, *offdiagIdx; 2398 Vec diagV, offdiagV; 2399 PetscScalar *a, *diagA, *offdiagA; 2400 PetscInt r; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2406 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2407 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2408 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2409 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2410 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2411 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2412 for (r = 0; r < n; ++r) { 2413 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2414 a[r] = diagA[r]; 2415 idx[r] = cstart + diagIdx[r]; 2416 } else { 2417 a[r] = offdiagA[r]; 2418 idx[r] = cmap[offdiagIdx[r]]; 2419 } 2420 } 2421 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2424 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2425 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2426 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2433 PetscInt n = A->rmap->n; 2434 PetscInt cstart = A->cmap->rstart; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 PetscInt r; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2447 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 for (r = 0; r < n; ++r) { 2452 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2453 a[r] = diagA[r]; 2454 idx[r] = cstart + diagIdx[r]; 2455 } else { 2456 a[r] = offdiagA[r]; 2457 idx[r] = cmap[offdiagIdx[r]]; 2458 } 2459 } 2460 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2470 { 2471 PetscErrorCode ierr; 2472 Mat *dummy; 2473 2474 PetscFunctionBegin; 2475 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2476 *newmat = *dummy; 2477 ierr = PetscFree(dummy);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2482 { 2483 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2488 A->factorerrortype = a->A->factorerrortype; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2493 { 2494 PetscErrorCode ierr; 2495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2496 2497 PetscFunctionBegin; 2498 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2499 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2500 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2506 { 2507 PetscFunctionBegin; 2508 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2509 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /*@ 2514 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2515 2516 Collective on Mat 2517 2518 Input Parameters: 2519 + A - the matrix 2520 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2521 2522 Level: advanced 2523 2524 @*/ 2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2526 { 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2535 { 2536 PetscErrorCode ierr; 2537 PetscBool sc = PETSC_FALSE,flg; 2538 2539 PetscFunctionBegin; 2540 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2541 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2542 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2543 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2544 if (flg) { 2545 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2546 } 2547 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2552 { 2553 PetscErrorCode ierr; 2554 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2555 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2556 2557 PetscFunctionBegin; 2558 if (!Y->preallocated) { 2559 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2560 } else if (!aij->nz) { 2561 PetscInt nonew = aij->nonew; 2562 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2563 aij->nonew = nonew; 2564 } 2565 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2570 { 2571 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2576 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2577 if (d) { 2578 PetscInt rstart; 2579 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2580 *d += rstart; 2581 2582 } 2583 PetscFunctionReturn(0); 2584 } 2585 2586 2587 /* -------------------------------------------------------------------*/ 2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2589 MatGetRow_MPIAIJ, 2590 MatRestoreRow_MPIAIJ, 2591 MatMult_MPIAIJ, 2592 /* 4*/ MatMultAdd_MPIAIJ, 2593 MatMultTranspose_MPIAIJ, 2594 MatMultTransposeAdd_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 /*10*/ 0, 2599 0, 2600 0, 2601 MatSOR_MPIAIJ, 2602 MatTranspose_MPIAIJ, 2603 /*15*/ MatGetInfo_MPIAIJ, 2604 MatEqual_MPIAIJ, 2605 MatGetDiagonal_MPIAIJ, 2606 MatDiagonalScale_MPIAIJ, 2607 MatNorm_MPIAIJ, 2608 /*20*/ MatAssemblyBegin_MPIAIJ, 2609 MatAssemblyEnd_MPIAIJ, 2610 MatSetOption_MPIAIJ, 2611 MatZeroEntries_MPIAIJ, 2612 /*24*/ MatZeroRows_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*29*/ MatSetUp_MPIAIJ, 2618 0, 2619 0, 2620 MatGetDiagonalBlock_MPIAIJ, 2621 0, 2622 /*34*/ MatDuplicate_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*39*/ MatAXPY_MPIAIJ, 2628 MatCreateSubMatrices_MPIAIJ, 2629 MatIncreaseOverlap_MPIAIJ, 2630 MatGetValues_MPIAIJ, 2631 MatCopy_MPIAIJ, 2632 /*44*/ MatGetRowMax_MPIAIJ, 2633 MatScale_MPIAIJ, 2634 MatShift_MPIAIJ, 2635 MatDiagonalSet_MPIAIJ, 2636 MatZeroRowsColumns_MPIAIJ, 2637 /*49*/ MatSetRandom_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*54*/ MatFDColoringCreate_MPIXAIJ, 2643 0, 2644 MatSetUnfactored_MPIAIJ, 2645 MatPermute_MPIAIJ, 2646 0, 2647 /*59*/ MatCreateSubMatrix_MPIAIJ, 2648 MatDestroy_MPIAIJ, 2649 MatView_MPIAIJ, 2650 0, 2651 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2652 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2653 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2658 MatGetRowMinAbs_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*75*/ MatFDColoringApply_AIJ, 2664 MatSetFromOptions_MPIAIJ, 2665 0, 2666 0, 2667 MatFindZeroDiagonals_MPIAIJ, 2668 /*80*/ 0, 2669 0, 2670 0, 2671 /*83*/ MatLoad_MPIAIJ, 2672 MatIsSymmetric_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2678 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2679 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2680 MatPtAP_MPIAIJ_MPIAIJ, 2681 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2682 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*99*/ 0, 2688 0, 2689 0, 2690 MatConjugate_MPIAIJ, 2691 0, 2692 /*104*/MatSetValuesRow_MPIAIJ, 2693 MatRealPart_MPIAIJ, 2694 MatImaginaryPart_MPIAIJ, 2695 0, 2696 0, 2697 /*109*/0, 2698 0, 2699 MatGetRowMin_MPIAIJ, 2700 0, 2701 MatMissingDiagonal_MPIAIJ, 2702 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2703 0, 2704 MatGetGhosts_MPIAIJ, 2705 0, 2706 0, 2707 /*119*/0, 2708 0, 2709 0, 2710 0, 2711 MatGetMultiProcBlock_MPIAIJ, 2712 /*124*/MatFindNonzeroRows_MPIAIJ, 2713 MatGetColumnNorms_MPIAIJ, 2714 MatInvertBlockDiagonal_MPIAIJ, 2715 0, 2716 MatCreateSubMatricesMPI_MPIAIJ, 2717 /*129*/0, 2718 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2719 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2720 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 /*134*/0, 2723 0, 2724 MatRARt_MPIAIJ_MPIAIJ, 2725 0, 2726 0, 2727 /*139*/MatSetBlockSizes_MPIAIJ, 2728 0, 2729 0, 2730 MatFDColoringSetUp_MPIXAIJ, 2731 MatFindOffBlockDiagonalEntries_MPIAIJ, 2732 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2733 }; 2734 2735 /* ----------------------------------------------------------------------------------------*/ 2736 2737 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2738 { 2739 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2744 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2745 PetscFunctionReturn(0); 2746 } 2747 2748 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2749 { 2750 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2755 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2756 PetscFunctionReturn(0); 2757 } 2758 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 b = (Mat_MPIAIJ*)B->data; 2768 2769 #if defined(PETSC_USE_CTABLE) 2770 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2771 #else 2772 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2773 #endif 2774 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2775 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2776 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2777 2778 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2779 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2780 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2781 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2782 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2783 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2785 2786 if (!B->preallocated) { 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2790 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2792 } 2793 2794 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2795 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2796 B->preallocated = PETSC_TRUE; 2797 B->was_assembled = PETSC_FALSE; 2798 B->assembled = PETSC_FALSE;; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2803 { 2804 Mat_MPIAIJ *b; 2805 PetscErrorCode ierr; 2806 2807 PetscFunctionBegin; 2808 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2809 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2810 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2811 b = (Mat_MPIAIJ*)B->data; 2812 2813 #if defined(PETSC_USE_CTABLE) 2814 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2815 #else 2816 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2817 #endif 2818 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2819 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2820 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2821 2822 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2823 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2824 B->preallocated = PETSC_TRUE; 2825 B->was_assembled = PETSC_FALSE; 2826 B->assembled = PETSC_FALSE; 2827 PetscFunctionReturn(0); 2828 } 2829 2830 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2831 { 2832 Mat mat; 2833 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2834 PetscErrorCode ierr; 2835 2836 PetscFunctionBegin; 2837 *newmat = 0; 2838 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2839 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2840 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2841 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2842 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2843 a = (Mat_MPIAIJ*)mat->data; 2844 2845 mat->factortype = matin->factortype; 2846 mat->assembled = PETSC_TRUE; 2847 mat->insertmode = NOT_SET_VALUES; 2848 mat->preallocated = PETSC_TRUE; 2849 2850 a->size = oldmat->size; 2851 a->rank = oldmat->rank; 2852 a->donotstash = oldmat->donotstash; 2853 a->roworiented = oldmat->roworiented; 2854 a->rowindices = 0; 2855 a->rowvalues = 0; 2856 a->getrowactive = PETSC_FALSE; 2857 2858 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2859 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2860 2861 if (oldmat->colmap) { 2862 #if defined(PETSC_USE_CTABLE) 2863 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2864 #else 2865 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2866 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2867 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2868 #endif 2869 } else a->colmap = 0; 2870 if (oldmat->garray) { 2871 PetscInt len; 2872 len = oldmat->B->cmap->n; 2873 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2874 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2875 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2876 } else a->garray = 0; 2877 2878 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2879 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2880 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2882 2883 if (oldmat->Mvctx_mpi1) { 2884 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2886 } 2887 2888 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2890 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2891 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2892 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2893 *newmat = mat; 2894 PetscFunctionReturn(0); 2895 } 2896 2897 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2898 { 2899 PetscScalar *vals,*svals; 2900 MPI_Comm comm; 2901 PetscErrorCode ierr; 2902 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2903 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2904 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2905 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2906 PetscInt cend,cstart,n,*rowners; 2907 int fd; 2908 PetscInt bs = newMat->rmap->bs; 2909 2910 PetscFunctionBegin; 2911 /* force binary viewer to load .info file if it has not yet done so */ 2912 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2913 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2915 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2916 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2917 if (!rank) { 2918 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2919 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2920 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2921 } 2922 2923 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2924 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2925 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2926 if (bs < 0) bs = 1; 2927 2928 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2929 M = header[1]; N = header[2]; 2930 2931 /* If global sizes are set, check if they are consistent with that given in the file */ 2932 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2933 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2934 2935 /* determine ownership of all (block) rows */ 2936 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2937 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2938 else m = newMat->rmap->n; /* Set by user */ 2939 2940 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2941 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2942 2943 /* First process needs enough room for process with most rows */ 2944 if (!rank) { 2945 mmax = rowners[1]; 2946 for (i=2; i<=size; i++) { 2947 mmax = PetscMax(mmax, rowners[i]); 2948 } 2949 } else mmax = -1; /* unused, but compilers complain */ 2950 2951 rowners[0] = 0; 2952 for (i=2; i<=size; i++) { 2953 rowners[i] += rowners[i-1]; 2954 } 2955 rstart = rowners[rank]; 2956 rend = rowners[rank+1]; 2957 2958 /* distribute row lengths to all processors */ 2959 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2960 if (!rank) { 2961 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2962 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2963 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2964 for (j=0; j<m; j++) { 2965 procsnz[0] += ourlens[j]; 2966 } 2967 for (i=1; i<size; i++) { 2968 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2969 /* calculate the number of nonzeros on each processor */ 2970 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2971 procsnz[i] += rowlengths[j]; 2972 } 2973 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2974 } 2975 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2976 } else { 2977 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2978 } 2979 2980 if (!rank) { 2981 /* determine max buffer needed and allocate it */ 2982 maxnz = 0; 2983 for (i=0; i<size; i++) { 2984 maxnz = PetscMax(maxnz,procsnz[i]); 2985 } 2986 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2987 2988 /* read in my part of the matrix column indices */ 2989 nz = procsnz[0]; 2990 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2991 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2992 2993 /* read in every one elses and ship off */ 2994 for (i=1; i<size; i++) { 2995 nz = procsnz[i]; 2996 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2997 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2998 } 2999 ierr = PetscFree(cols);CHKERRQ(ierr); 3000 } else { 3001 /* determine buffer space needed for message */ 3002 nz = 0; 3003 for (i=0; i<m; i++) { 3004 nz += ourlens[i]; 3005 } 3006 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3007 3008 /* receive message of column indices*/ 3009 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3010 } 3011 3012 /* determine column ownership if matrix is not square */ 3013 if (N != M) { 3014 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3015 else n = newMat->cmap->n; 3016 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3017 cstart = cend - n; 3018 } else { 3019 cstart = rstart; 3020 cend = rend; 3021 n = cend - cstart; 3022 } 3023 3024 /* loop over local rows, determining number of off diagonal entries */ 3025 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3026 jj = 0; 3027 for (i=0; i<m; i++) { 3028 for (j=0; j<ourlens[i]; j++) { 3029 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3030 jj++; 3031 } 3032 } 3033 3034 for (i=0; i<m; i++) { 3035 ourlens[i] -= offlens[i]; 3036 } 3037 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3038 3039 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3040 3041 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3042 3043 for (i=0; i<m; i++) { 3044 ourlens[i] += offlens[i]; 3045 } 3046 3047 if (!rank) { 3048 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3049 3050 /* read in my part of the matrix numerical values */ 3051 nz = procsnz[0]; 3052 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3053 3054 /* insert into matrix */ 3055 jj = rstart; 3056 smycols = mycols; 3057 svals = vals; 3058 for (i=0; i<m; i++) { 3059 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3060 smycols += ourlens[i]; 3061 svals += ourlens[i]; 3062 jj++; 3063 } 3064 3065 /* read in other processors and ship out */ 3066 for (i=1; i<size; i++) { 3067 nz = procsnz[i]; 3068 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3069 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3070 } 3071 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3072 } else { 3073 /* receive numeric values */ 3074 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3075 3076 /* receive message of values*/ 3077 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3078 3079 /* insert into matrix */ 3080 jj = rstart; 3081 smycols = mycols; 3082 svals = vals; 3083 for (i=0; i<m; i++) { 3084 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3085 smycols += ourlens[i]; 3086 svals += ourlens[i]; 3087 jj++; 3088 } 3089 } 3090 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3091 ierr = PetscFree(vals);CHKERRQ(ierr); 3092 ierr = PetscFree(mycols);CHKERRQ(ierr); 3093 ierr = PetscFree(rowners);CHKERRQ(ierr); 3094 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3095 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 /* Not scalable because of ISAllGather() unless getting all columns. */ 3100 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3101 { 3102 PetscErrorCode ierr; 3103 IS iscol_local; 3104 PetscBool isstride; 3105 PetscMPIInt lisstride=0,gisstride; 3106 3107 PetscFunctionBegin; 3108 /* check if we are grabbing all columns*/ 3109 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3110 3111 if (isstride) { 3112 PetscInt start,len,mstart,mlen; 3113 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3114 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3115 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3116 if (mstart == start && mlen-mstart == len) lisstride = 1; 3117 } 3118 3119 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3120 if (gisstride) { 3121 PetscInt N; 3122 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3123 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3124 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3125 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3126 } else { 3127 PetscInt cbs; 3128 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3129 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3130 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3131 } 3132 3133 *isseq = iscol_local; 3134 PetscFunctionReturn(0); 3135 } 3136 3137 /* 3138 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3139 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3140 3141 Input Parameters: 3142 mat - matrix 3143 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3144 i.e., mat->rstart <= isrow[i] < mat->rend 3145 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3146 i.e., mat->cstart <= iscol[i] < mat->cend 3147 Output Parameter: 3148 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3149 iscol_o - sequential column index set for retrieving mat->B 3150 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3151 */ 3152 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3153 { 3154 PetscErrorCode ierr; 3155 Vec x,cmap; 3156 const PetscInt *is_idx; 3157 PetscScalar *xarray,*cmaparray; 3158 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3159 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3160 Mat B=a->B; 3161 Vec lvec=a->lvec,lcmap; 3162 PetscInt i,cstart,cend,Bn=B->cmap->N; 3163 MPI_Comm comm; 3164 VecScatter Mvctx=a->Mvctx; 3165 3166 PetscFunctionBegin; 3167 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3168 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3169 3170 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3171 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3172 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3173 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3174 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3175 3176 /* Get start indices */ 3177 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3178 isstart -= ncols; 3179 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3180 3181 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3182 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3183 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3184 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3185 for (i=0; i<ncols; i++) { 3186 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3187 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3188 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3189 } 3190 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3192 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3193 3194 /* Get iscol_d */ 3195 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3196 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3197 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3198 3199 /* Get isrow_d */ 3200 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3201 rstart = mat->rmap->rstart; 3202 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3203 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3204 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3205 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3206 3207 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3208 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3209 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3210 3211 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3212 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3213 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 3215 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3216 3217 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3218 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3219 3220 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3221 /* off-process column indices */ 3222 count = 0; 3223 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3224 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3225 3226 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3227 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3228 for (i=0; i<Bn; i++) { 3229 if (PetscRealPart(xarray[i]) > -1.0) { 3230 idx[count] = i; /* local column index in off-diagonal part B */ 3231 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3232 count++; 3233 } 3234 } 3235 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3236 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3237 3238 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3239 /* cannot ensure iscol_o has same blocksize as iscol! */ 3240 3241 ierr = PetscFree(idx);CHKERRQ(ierr); 3242 *garray = cmap1; 3243 3244 ierr = VecDestroy(&x);CHKERRQ(ierr); 3245 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3246 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3247 PetscFunctionReturn(0); 3248 } 3249 3250 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3252 { 3253 PetscErrorCode ierr; 3254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3255 Mat M = NULL; 3256 MPI_Comm comm; 3257 IS iscol_d,isrow_d,iscol_o; 3258 Mat Asub = NULL,Bsub = NULL; 3259 PetscInt n; 3260 3261 PetscFunctionBegin; 3262 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3263 3264 if (call == MAT_REUSE_MATRIX) { 3265 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3266 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3267 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3268 3269 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3270 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3271 3272 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3273 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3274 3275 /* Update diagonal and off-diagonal portions of submat */ 3276 asub = (Mat_MPIAIJ*)(*submat)->data; 3277 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3278 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3279 if (n) { 3280 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3281 } 3282 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 3285 } else { /* call == MAT_INITIAL_MATRIX) */ 3286 const PetscInt *garray; 3287 PetscInt BsubN; 3288 3289 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3290 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3291 3292 /* Create local submatrices Asub and Bsub */ 3293 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3294 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3295 3296 /* Create submatrix M */ 3297 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3298 3299 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3300 asub = (Mat_MPIAIJ*)M->data; 3301 3302 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3303 n = asub->B->cmap->N; 3304 if (BsubN > n) { 3305 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3306 const PetscInt *idx; 3307 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3308 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3309 3310 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3311 j = 0; 3312 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3313 for (i=0; i<n; i++) { 3314 if (j >= BsubN) break; 3315 while (subgarray[i] > garray[j]) j++; 3316 3317 if (subgarray[i] == garray[j]) { 3318 idx_new[i] = idx[j++]; 3319 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3320 } 3321 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3322 3323 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3324 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3325 3326 } else if (BsubN < n) { 3327 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3328 } 3329 3330 ierr = PetscFree(garray);CHKERRQ(ierr); 3331 *submat = M; 3332 3333 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3334 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3335 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3336 3337 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3338 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3339 3340 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3341 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3342 } 3343 PetscFunctionReturn(0); 3344 } 3345 3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3347 { 3348 PetscErrorCode ierr; 3349 IS iscol_local=NULL,isrow_d; 3350 PetscInt csize; 3351 PetscInt n,i,j,start,end; 3352 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3353 MPI_Comm comm; 3354 3355 PetscFunctionBegin; 3356 /* If isrow has same processor distribution as mat, 3357 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3358 if (call == MAT_REUSE_MATRIX) { 3359 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3360 if (isrow_d) { 3361 sameRowDist = PETSC_TRUE; 3362 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3363 } else { 3364 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3365 if (iscol_local) { 3366 sameRowDist = PETSC_TRUE; 3367 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3368 } 3369 } 3370 } else { 3371 /* Check if isrow has same processor distribution as mat */ 3372 sameDist[0] = PETSC_FALSE; 3373 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3374 if (!n) { 3375 sameDist[0] = PETSC_TRUE; 3376 } else { 3377 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3378 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3379 if (i >= start && j < end) { 3380 sameDist[0] = PETSC_TRUE; 3381 } 3382 } 3383 3384 /* Check if iscol has same processor distribution as mat */ 3385 sameDist[1] = PETSC_FALSE; 3386 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3387 if (!n) { 3388 sameDist[1] = PETSC_TRUE; 3389 } else { 3390 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3391 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3392 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3393 } 3394 3395 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3396 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3397 sameRowDist = tsameDist[0]; 3398 } 3399 3400 if (sameRowDist) { 3401 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3402 /* isrow and iscol have same processor distribution as mat */ 3403 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3404 PetscFunctionReturn(0); 3405 } else { /* sameRowDist */ 3406 /* isrow has same processor distribution as mat */ 3407 if (call == MAT_INITIAL_MATRIX) { 3408 PetscBool sorted; 3409 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3410 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3411 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3412 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3413 3414 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3415 if (sorted) { 3416 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3417 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3418 PetscFunctionReturn(0); 3419 } 3420 } else { /* call == MAT_REUSE_MATRIX */ 3421 IS iscol_sub; 3422 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3423 if (iscol_sub) { 3424 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3425 PetscFunctionReturn(0); 3426 } 3427 } 3428 } 3429 } 3430 3431 /* General case: iscol -> iscol_local which has global size of iscol */ 3432 if (call == MAT_REUSE_MATRIX) { 3433 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3434 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3435 } else { 3436 if (!iscol_local) { 3437 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3438 } 3439 } 3440 3441 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3442 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3443 3444 if (call == MAT_INITIAL_MATRIX) { 3445 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3446 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3447 } 3448 PetscFunctionReturn(0); 3449 } 3450 3451 /*@C 3452 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3453 and "off-diagonal" part of the matrix in CSR format. 3454 3455 Collective on MPI_Comm 3456 3457 Input Parameters: 3458 + comm - MPI communicator 3459 . A - "diagonal" portion of matrix 3460 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3461 - garray - global index of B columns 3462 3463 Output Parameter: 3464 . mat - the matrix, with input A as its local diagonal matrix 3465 Level: advanced 3466 3467 Notes: 3468 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3469 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3470 3471 .seealso: MatCreateMPIAIJWithSplitArrays() 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3474 { 3475 PetscErrorCode ierr; 3476 Mat_MPIAIJ *maij; 3477 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3478 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3479 PetscScalar *oa=b->a; 3480 Mat Bnew; 3481 PetscInt m,n,N; 3482 3483 PetscFunctionBegin; 3484 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3485 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3486 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3487 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3493 3494 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3495 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3496 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3497 maij = (Mat_MPIAIJ*)(*mat)->data; 3498 3499 (*mat)->preallocated = PETSC_TRUE; 3500 3501 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3502 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3503 3504 /* Set A as diagonal portion of *mat */ 3505 maij->A = A; 3506 3507 nz = oi[m]; 3508 for (i=0; i<nz; i++) { 3509 col = oj[i]; 3510 oj[i] = garray[col]; 3511 } 3512 3513 /* Set Bnew as off-diagonal portion of *mat */ 3514 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3515 bnew = (Mat_SeqAIJ*)Bnew->data; 3516 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3517 maij->B = Bnew; 3518 3519 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3520 3521 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3522 b->free_a = PETSC_FALSE; 3523 b->free_ij = PETSC_FALSE; 3524 ierr = MatDestroy(&B);CHKERRQ(ierr); 3525 3526 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3527 bnew->free_a = PETSC_TRUE; 3528 bnew->free_ij = PETSC_TRUE; 3529 3530 /* condense columns of maij->B */ 3531 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3532 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3535 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3540 3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3542 { 3543 PetscErrorCode ierr; 3544 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3545 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3547 Mat M,Msub,B=a->B; 3548 MatScalar *aa; 3549 Mat_SeqAIJ *aij; 3550 PetscInt *garray = a->garray,*colsub,Ncols; 3551 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3552 IS iscol_sub,iscmap; 3553 const PetscInt *is_idx,*cmap; 3554 PetscBool allcolumns=PETSC_FALSE; 3555 MPI_Comm comm; 3556 3557 PetscFunctionBegin; 3558 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3559 3560 if (call == MAT_REUSE_MATRIX) { 3561 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3562 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3563 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3564 3565 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3566 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3567 3568 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3569 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3570 3571 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3572 3573 } else { /* call == MAT_INITIAL_MATRIX) */ 3574 PetscBool flg; 3575 3576 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3577 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3578 3579 /* (1) iscol -> nonscalable iscol_local */ 3580 /* Check for special case: each processor gets entire matrix columns */ 3581 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3582 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3583 if (allcolumns) { 3584 iscol_sub = iscol_local; 3585 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3586 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3587 3588 } else { 3589 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3590 PetscInt *idx,*cmap1,k; 3591 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3592 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3593 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3594 count = 0; 3595 k = 0; 3596 for (i=0; i<Ncols; i++) { 3597 j = is_idx[i]; 3598 if (j >= cstart && j < cend) { 3599 /* diagonal part of mat */ 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } else if (Bn) { 3603 /* off-diagonal part of mat */ 3604 if (j == garray[k]) { 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } else if (j > garray[k]) { 3608 while (j > garray[k] && k < Bn-1) k++; 3609 if (j == garray[k]) { 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } 3613 } 3614 } 3615 } 3616 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3617 3618 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3619 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3620 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3621 3622 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3623 } 3624 3625 /* (3) Create sequential Msub */ 3626 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3627 } 3628 3629 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3630 aij = (Mat_SeqAIJ*)(Msub)->data; 3631 ii = aij->i; 3632 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3633 3634 /* 3635 m - number of local rows 3636 Ncols - number of columns (same on all processors) 3637 rstart - first row in new global matrix generated 3638 */ 3639 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3640 3641 if (call == MAT_INITIAL_MATRIX) { 3642 /* (4) Create parallel newmat */ 3643 PetscMPIInt rank,size; 3644 PetscInt csize; 3645 3646 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3648 3649 /* 3650 Determine the number of non-zeros in the diagonal and off-diagonal 3651 portions of the matrix in order to do correct preallocation 3652 */ 3653 3654 /* first get start and end of "diagonal" columns */ 3655 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3656 if (csize == PETSC_DECIDE) { 3657 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3658 if (mglobal == Ncols) { /* square matrix */ 3659 nlocal = m; 3660 } else { 3661 nlocal = Ncols/size + ((Ncols % size) > rank); 3662 } 3663 } else { 3664 nlocal = csize; 3665 } 3666 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3667 rstart = rend - nlocal; 3668 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3669 3670 /* next, compute all the lengths */ 3671 jj = aij->j; 3672 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3673 olens = dlens + m; 3674 for (i=0; i<m; i++) { 3675 jend = ii[i+1] - ii[i]; 3676 olen = 0; 3677 dlen = 0; 3678 for (j=0; j<jend; j++) { 3679 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3680 else dlen++; 3681 jj++; 3682 } 3683 olens[i] = olen; 3684 dlens[i] = dlen; 3685 } 3686 3687 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3688 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3689 3690 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3691 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3692 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3693 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3694 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3695 ierr = PetscFree(dlens);CHKERRQ(ierr); 3696 3697 } else { /* call == MAT_REUSE_MATRIX */ 3698 M = *newmat; 3699 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3700 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3701 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3702 /* 3703 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3704 rather than the slower MatSetValues(). 3705 */ 3706 M->was_assembled = PETSC_TRUE; 3707 M->assembled = PETSC_FALSE; 3708 } 3709 3710 /* (5) Set values of Msub to *newmat */ 3711 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3712 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3713 3714 jj = aij->j; 3715 aa = aij->a; 3716 for (i=0; i<m; i++) { 3717 row = rstart + i; 3718 nz = ii[i+1] - ii[i]; 3719 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3720 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3721 jj += nz; aa += nz; 3722 } 3723 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3724 3725 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3726 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3727 3728 ierr = PetscFree(colsub);CHKERRQ(ierr); 3729 3730 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3731 if (call == MAT_INITIAL_MATRIX) { 3732 *newmat = M; 3733 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3734 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3735 3736 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3737 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3738 3739 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3740 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3741 3742 if (iscol_local) { 3743 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3744 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3745 } 3746 } 3747 PetscFunctionReturn(0); 3748 } 3749 3750 /* 3751 Not great since it makes two copies of the submatrix, first an SeqAIJ 3752 in local and then by concatenating the local matrices the end result. 3753 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3754 3755 Note: This requires a sequential iscol with all indices. 3756 */ 3757 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3758 { 3759 PetscErrorCode ierr; 3760 PetscMPIInt rank,size; 3761 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3762 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3763 Mat M,Mreuse; 3764 MatScalar *aa,*vwork; 3765 MPI_Comm comm; 3766 Mat_SeqAIJ *aij; 3767 PetscBool colflag,allcolumns=PETSC_FALSE; 3768 3769 PetscFunctionBegin; 3770 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3771 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3773 3774 /* Check for special case: each processor gets entire matrix columns */ 3775 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3776 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3777 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3778 3779 if (call == MAT_REUSE_MATRIX) { 3780 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3781 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3782 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3783 } else { 3784 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3785 } 3786 3787 /* 3788 m - number of local rows 3789 n - number of columns (same on all processors) 3790 rstart - first row in new global matrix generated 3791 */ 3792 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3793 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3794 if (call == MAT_INITIAL_MATRIX) { 3795 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3796 ii = aij->i; 3797 jj = aij->j; 3798 3799 /* 3800 Determine the number of non-zeros in the diagonal and off-diagonal 3801 portions of the matrix in order to do correct preallocation 3802 */ 3803 3804 /* first get start and end of "diagonal" columns */ 3805 if (csize == PETSC_DECIDE) { 3806 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3807 if (mglobal == n) { /* square matrix */ 3808 nlocal = m; 3809 } else { 3810 nlocal = n/size + ((n % size) > rank); 3811 } 3812 } else { 3813 nlocal = csize; 3814 } 3815 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3816 rstart = rend - nlocal; 3817 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3818 3819 /* next, compute all the lengths */ 3820 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3821 olens = dlens + m; 3822 for (i=0; i<m; i++) { 3823 jend = ii[i+1] - ii[i]; 3824 olen = 0; 3825 dlen = 0; 3826 for (j=0; j<jend; j++) { 3827 if (*jj < rstart || *jj >= rend) olen++; 3828 else dlen++; 3829 jj++; 3830 } 3831 olens[i] = olen; 3832 dlens[i] = dlen; 3833 } 3834 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3835 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3836 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3837 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3838 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3839 ierr = PetscFree(dlens);CHKERRQ(ierr); 3840 } else { 3841 PetscInt ml,nl; 3842 3843 M = *newmat; 3844 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3845 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3846 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3847 /* 3848 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3849 rather than the slower MatSetValues(). 3850 */ 3851 M->was_assembled = PETSC_TRUE; 3852 M->assembled = PETSC_FALSE; 3853 } 3854 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3855 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3856 ii = aij->i; 3857 jj = aij->j; 3858 aa = aij->a; 3859 for (i=0; i<m; i++) { 3860 row = rstart + i; 3861 nz = ii[i+1] - ii[i]; 3862 cwork = jj; jj += nz; 3863 vwork = aa; aa += nz; 3864 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3865 } 3866 3867 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 *newmat = M; 3870 3871 /* save submatrix used in processor for next request */ 3872 if (call == MAT_INITIAL_MATRIX) { 3873 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3874 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3875 } 3876 PetscFunctionReturn(0); 3877 } 3878 3879 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3880 { 3881 PetscInt m,cstart, cend,j,nnz,i,d; 3882 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3883 const PetscInt *JJ; 3884 PetscScalar *values; 3885 PetscErrorCode ierr; 3886 PetscBool nooffprocentries; 3887 3888 PetscFunctionBegin; 3889 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3890 3891 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3892 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3893 m = B->rmap->n; 3894 cstart = B->cmap->rstart; 3895 cend = B->cmap->rend; 3896 rstart = B->rmap->rstart; 3897 3898 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3899 3900 #if defined(PETSC_USE_DEBUG) 3901 for (i=0; i<m; i++) { 3902 nnz = Ii[i+1]- Ii[i]; 3903 JJ = J + Ii[i]; 3904 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3905 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3906 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3907 } 3908 #endif 3909 3910 for (i=0; i<m; i++) { 3911 nnz = Ii[i+1]- Ii[i]; 3912 JJ = J + Ii[i]; 3913 nnz_max = PetscMax(nnz_max,nnz); 3914 d = 0; 3915 for (j=0; j<nnz; j++) { 3916 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3917 } 3918 d_nnz[i] = d; 3919 o_nnz[i] = nnz - d; 3920 } 3921 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3922 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3923 3924 if (v) values = (PetscScalar*)v; 3925 else { 3926 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3927 } 3928 3929 for (i=0; i<m; i++) { 3930 ii = i + rstart; 3931 nnz = Ii[i+1]- Ii[i]; 3932 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3933 } 3934 nooffprocentries = B->nooffprocentries; 3935 B->nooffprocentries = PETSC_TRUE; 3936 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3938 B->nooffprocentries = nooffprocentries; 3939 3940 if (!v) { 3941 ierr = PetscFree(values);CHKERRQ(ierr); 3942 } 3943 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3944 PetscFunctionReturn(0); 3945 } 3946 3947 /*@ 3948 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3949 (the default parallel PETSc format). 3950 3951 Collective on MPI_Comm 3952 3953 Input Parameters: 3954 + B - the matrix 3955 . i - the indices into j for the start of each local row (starts with zero) 3956 . j - the column indices for each local row (starts with zero) 3957 - v - optional values in the matrix 3958 3959 Level: developer 3960 3961 Notes: 3962 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3963 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3964 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3965 3966 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3967 3968 The format which is used for the sparse matrix input, is equivalent to a 3969 row-major ordering.. i.e for the following matrix, the input data expected is 3970 as shown 3971 3972 $ 1 0 0 3973 $ 2 0 3 P0 3974 $ ------- 3975 $ 4 5 6 P1 3976 $ 3977 $ Process0 [P0]: rows_owned=[0,1] 3978 $ i = {0,1,3} [size = nrow+1 = 2+1] 3979 $ j = {0,0,2} [size = 3] 3980 $ v = {1,2,3} [size = 3] 3981 $ 3982 $ Process1 [P1]: rows_owned=[2] 3983 $ i = {0,3} [size = nrow+1 = 1+1] 3984 $ j = {0,1,2} [size = 3] 3985 $ v = {4,5,6} [size = 3] 3986 3987 .keywords: matrix, aij, compressed row, sparse, parallel 3988 3989 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3990 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3991 @*/ 3992 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3993 { 3994 PetscErrorCode ierr; 3995 3996 PetscFunctionBegin; 3997 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3998 PetscFunctionReturn(0); 3999 } 4000 4001 /*@C 4002 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4003 (the default parallel PETSc format). For good matrix assembly performance 4004 the user should preallocate the matrix storage by setting the parameters 4005 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4006 performance can be increased by more than a factor of 50. 4007 4008 Collective on MPI_Comm 4009 4010 Input Parameters: 4011 + B - the matrix 4012 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4013 (same value is used for all local rows) 4014 . d_nnz - array containing the number of nonzeros in the various rows of the 4015 DIAGONAL portion of the local submatrix (possibly different for each row) 4016 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4017 The size of this array is equal to the number of local rows, i.e 'm'. 4018 For matrices that will be factored, you must leave room for (and set) 4019 the diagonal entry even if it is zero. 4020 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4021 submatrix (same value is used for all local rows). 4022 - o_nnz - array containing the number of nonzeros in the various rows of the 4023 OFF-DIAGONAL portion of the local submatrix (possibly different for 4024 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4025 structure. The size of this array is equal to the number 4026 of local rows, i.e 'm'. 4027 4028 If the *_nnz parameter is given then the *_nz parameter is ignored 4029 4030 The AIJ format (also called the Yale sparse matrix format or 4031 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4032 storage. The stored row and column indices begin with zero. 4033 See Users-Manual: ch_mat for details. 4034 4035 The parallel matrix is partitioned such that the first m0 rows belong to 4036 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4037 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4038 4039 The DIAGONAL portion of the local submatrix of a processor can be defined 4040 as the submatrix which is obtained by extraction the part corresponding to 4041 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4042 first row that belongs to the processor, r2 is the last row belonging to 4043 the this processor, and c1-c2 is range of indices of the local part of a 4044 vector suitable for applying the matrix to. This is an mxn matrix. In the 4045 common case of a square matrix, the row and column ranges are the same and 4046 the DIAGONAL part is also square. The remaining portion of the local 4047 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4048 4049 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4050 4051 You can call MatGetInfo() to get information on how effective the preallocation was; 4052 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4053 You can also run with the option -info and look for messages with the string 4054 malloc in them to see if additional memory allocation was needed. 4055 4056 Example usage: 4057 4058 Consider the following 8x8 matrix with 34 non-zero values, that is 4059 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4060 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4061 as follows: 4062 4063 .vb 4064 1 2 0 | 0 3 0 | 0 4 4065 Proc0 0 5 6 | 7 0 0 | 8 0 4066 9 0 10 | 11 0 0 | 12 0 4067 ------------------------------------- 4068 13 0 14 | 15 16 17 | 0 0 4069 Proc1 0 18 0 | 19 20 21 | 0 0 4070 0 0 0 | 22 23 0 | 24 0 4071 ------------------------------------- 4072 Proc2 25 26 27 | 0 0 28 | 29 0 4073 30 0 0 | 31 32 33 | 0 34 4074 .ve 4075 4076 This can be represented as a collection of submatrices as: 4077 4078 .vb 4079 A B C 4080 D E F 4081 G H I 4082 .ve 4083 4084 Where the submatrices A,B,C are owned by proc0, D,E,F are 4085 owned by proc1, G,H,I are owned by proc2. 4086 4087 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4088 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4089 The 'M','N' parameters are 8,8, and have the same values on all procs. 4090 4091 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4092 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4093 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4094 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4095 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4096 matrix, ans [DF] as another SeqAIJ matrix. 4097 4098 When d_nz, o_nz parameters are specified, d_nz storage elements are 4099 allocated for every row of the local diagonal submatrix, and o_nz 4100 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4101 One way to choose d_nz and o_nz is to use the max nonzerors per local 4102 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4103 In this case, the values of d_nz,o_nz are: 4104 .vb 4105 proc0 : dnz = 2, o_nz = 2 4106 proc1 : dnz = 3, o_nz = 2 4107 proc2 : dnz = 1, o_nz = 4 4108 .ve 4109 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4110 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4111 for proc3. i.e we are using 12+15+10=37 storage locations to store 4112 34 values. 4113 4114 When d_nnz, o_nnz parameters are specified, the storage is specified 4115 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4116 In the above case the values for d_nnz,o_nnz are: 4117 .vb 4118 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4119 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4120 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4121 .ve 4122 Here the space allocated is sum of all the above values i.e 34, and 4123 hence pre-allocation is perfect. 4124 4125 Level: intermediate 4126 4127 .keywords: matrix, aij, compressed row, sparse, parallel 4128 4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4130 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4131 @*/ 4132 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4133 { 4134 PetscErrorCode ierr; 4135 4136 PetscFunctionBegin; 4137 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4138 PetscValidType(B,1); 4139 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4140 PetscFunctionReturn(0); 4141 } 4142 4143 /*@ 4144 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4145 CSR format the local rows. 4146 4147 Collective on MPI_Comm 4148 4149 Input Parameters: 4150 + comm - MPI communicator 4151 . m - number of local rows (Cannot be PETSC_DECIDE) 4152 . n - This value should be the same as the local size used in creating the 4153 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4154 calculated if N is given) For square matrices n is almost always m. 4155 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4156 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4157 . i - row indices 4158 . j - column indices 4159 - a - matrix values 4160 4161 Output Parameter: 4162 . mat - the matrix 4163 4164 Level: intermediate 4165 4166 Notes: 4167 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4168 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4169 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4170 4171 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4172 4173 The format which is used for the sparse matrix input, is equivalent to a 4174 row-major ordering.. i.e for the following matrix, the input data expected is 4175 as shown 4176 4177 $ 1 0 0 4178 $ 2 0 3 P0 4179 $ ------- 4180 $ 4 5 6 P1 4181 $ 4182 $ Process0 [P0]: rows_owned=[0,1] 4183 $ i = {0,1,3} [size = nrow+1 = 2+1] 4184 $ j = {0,0,2} [size = 3] 4185 $ v = {1,2,3} [size = 3] 4186 $ 4187 $ Process1 [P1]: rows_owned=[2] 4188 $ i = {0,3} [size = nrow+1 = 1+1] 4189 $ j = {0,1,2} [size = 3] 4190 $ v = {4,5,6} [size = 3] 4191 4192 .keywords: matrix, aij, compressed row, sparse, parallel 4193 4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4195 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4196 @*/ 4197 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4198 { 4199 PetscErrorCode ierr; 4200 4201 PetscFunctionBegin; 4202 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4203 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4204 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4205 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4206 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4207 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4208 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4209 PetscFunctionReturn(0); 4210 } 4211 4212 /*@C 4213 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4214 (the default parallel PETSc format). For good matrix assembly performance 4215 the user should preallocate the matrix storage by setting the parameters 4216 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4217 performance can be increased by more than a factor of 50. 4218 4219 Collective on MPI_Comm 4220 4221 Input Parameters: 4222 + comm - MPI communicator 4223 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4224 This value should be the same as the local size used in creating the 4225 y vector for the matrix-vector product y = Ax. 4226 . n - This value should be the same as the local size used in creating the 4227 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4228 calculated if N is given) For square matrices n is almost always m. 4229 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4230 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4231 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4232 (same value is used for all local rows) 4233 . d_nnz - array containing the number of nonzeros in the various rows of the 4234 DIAGONAL portion of the local submatrix (possibly different for each row) 4235 or NULL, if d_nz is used to specify the nonzero structure. 4236 The size of this array is equal to the number of local rows, i.e 'm'. 4237 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4238 submatrix (same value is used for all local rows). 4239 - o_nnz - array containing the number of nonzeros in the various rows of the 4240 OFF-DIAGONAL portion of the local submatrix (possibly different for 4241 each row) or NULL, if o_nz is used to specify the nonzero 4242 structure. The size of this array is equal to the number 4243 of local rows, i.e 'm'. 4244 4245 Output Parameter: 4246 . A - the matrix 4247 4248 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4249 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4250 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4251 4252 Notes: 4253 If the *_nnz parameter is given then the *_nz parameter is ignored 4254 4255 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4256 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4257 storage requirements for this matrix. 4258 4259 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4260 processor than it must be used on all processors that share the object for 4261 that argument. 4262 4263 The user MUST specify either the local or global matrix dimensions 4264 (possibly both). 4265 4266 The parallel matrix is partitioned across processors such that the 4267 first m0 rows belong to process 0, the next m1 rows belong to 4268 process 1, the next m2 rows belong to process 2 etc.. where 4269 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4270 values corresponding to [m x N] submatrix. 4271 4272 The columns are logically partitioned with the n0 columns belonging 4273 to 0th partition, the next n1 columns belonging to the next 4274 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4275 4276 The DIAGONAL portion of the local submatrix on any given processor 4277 is the submatrix corresponding to the rows and columns m,n 4278 corresponding to the given processor. i.e diagonal matrix on 4279 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4280 etc. The remaining portion of the local submatrix [m x (N-n)] 4281 constitute the OFF-DIAGONAL portion. The example below better 4282 illustrates this concept. 4283 4284 For a square global matrix we define each processor's diagonal portion 4285 to be its local rows and the corresponding columns (a square submatrix); 4286 each processor's off-diagonal portion encompasses the remainder of the 4287 local matrix (a rectangular submatrix). 4288 4289 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4290 4291 When calling this routine with a single process communicator, a matrix of 4292 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4293 type of communicator, use the construction mechanism 4294 .vb 4295 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4296 .ve 4297 4298 $ MatCreate(...,&A); 4299 $ MatSetType(A,MATMPIAIJ); 4300 $ MatSetSizes(A, m,n,M,N); 4301 $ MatMPIAIJSetPreallocation(A,...); 4302 4303 By default, this format uses inodes (identical nodes) when possible. 4304 We search for consecutive rows with the same nonzero structure, thereby 4305 reusing matrix information to achieve increased efficiency. 4306 4307 Options Database Keys: 4308 + -mat_no_inode - Do not use inodes 4309 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4310 4311 4312 4313 Example usage: 4314 4315 Consider the following 8x8 matrix with 34 non-zero values, that is 4316 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4317 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4318 as follows 4319 4320 .vb 4321 1 2 0 | 0 3 0 | 0 4 4322 Proc0 0 5 6 | 7 0 0 | 8 0 4323 9 0 10 | 11 0 0 | 12 0 4324 ------------------------------------- 4325 13 0 14 | 15 16 17 | 0 0 4326 Proc1 0 18 0 | 19 20 21 | 0 0 4327 0 0 0 | 22 23 0 | 24 0 4328 ------------------------------------- 4329 Proc2 25 26 27 | 0 0 28 | 29 0 4330 30 0 0 | 31 32 33 | 0 34 4331 .ve 4332 4333 This can be represented as a collection of submatrices as 4334 4335 .vb 4336 A B C 4337 D E F 4338 G H I 4339 .ve 4340 4341 Where the submatrices A,B,C are owned by proc0, D,E,F are 4342 owned by proc1, G,H,I are owned by proc2. 4343 4344 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4345 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4346 The 'M','N' parameters are 8,8, and have the same values on all procs. 4347 4348 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4349 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4350 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4351 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4352 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4353 matrix, ans [DF] as another SeqAIJ matrix. 4354 4355 When d_nz, o_nz parameters are specified, d_nz storage elements are 4356 allocated for every row of the local diagonal submatrix, and o_nz 4357 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4358 One way to choose d_nz and o_nz is to use the max nonzerors per local 4359 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4360 In this case, the values of d_nz,o_nz are 4361 .vb 4362 proc0 : dnz = 2, o_nz = 2 4363 proc1 : dnz = 3, o_nz = 2 4364 proc2 : dnz = 1, o_nz = 4 4365 .ve 4366 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4367 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4368 for proc3. i.e we are using 12+15+10=37 storage locations to store 4369 34 values. 4370 4371 When d_nnz, o_nnz parameters are specified, the storage is specified 4372 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4373 In the above case the values for d_nnz,o_nnz are 4374 .vb 4375 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4376 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4377 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4378 .ve 4379 Here the space allocated is sum of all the above values i.e 34, and 4380 hence pre-allocation is perfect. 4381 4382 Level: intermediate 4383 4384 .keywords: matrix, aij, compressed row, sparse, parallel 4385 4386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4387 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4388 @*/ 4389 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4390 { 4391 PetscErrorCode ierr; 4392 PetscMPIInt size; 4393 4394 PetscFunctionBegin; 4395 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4396 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4398 if (size > 1) { 4399 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4400 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4401 } else { 4402 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4403 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4404 } 4405 PetscFunctionReturn(0); 4406 } 4407 4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4409 { 4410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4411 PetscBool flg; 4412 PetscErrorCode ierr; 4413 4414 PetscFunctionBegin; 4415 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4416 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4417 if (Ad) *Ad = a->A; 4418 if (Ao) *Ao = a->B; 4419 if (colmap) *colmap = a->garray; 4420 PetscFunctionReturn(0); 4421 } 4422 4423 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4424 { 4425 PetscErrorCode ierr; 4426 PetscInt m,N,i,rstart,nnz,Ii; 4427 PetscInt *indx; 4428 PetscScalar *values; 4429 4430 PetscFunctionBegin; 4431 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4432 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4433 PetscInt *dnz,*onz,sum,bs,cbs; 4434 4435 if (n == PETSC_DECIDE) { 4436 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4437 } 4438 /* Check sum(n) = N */ 4439 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4440 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4441 4442 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4443 rstart -= m; 4444 4445 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4446 for (i=0; i<m; i++) { 4447 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4448 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4449 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4450 } 4451 4452 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4453 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4454 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4455 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4456 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4457 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4458 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4459 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4460 } 4461 4462 /* numeric phase */ 4463 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4464 for (i=0; i<m; i++) { 4465 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4466 Ii = i + rstart; 4467 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4468 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4469 } 4470 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4471 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4476 { 4477 PetscErrorCode ierr; 4478 PetscMPIInt rank; 4479 PetscInt m,N,i,rstart,nnz; 4480 size_t len; 4481 const PetscInt *indx; 4482 PetscViewer out; 4483 char *name; 4484 Mat B; 4485 const PetscScalar *values; 4486 4487 PetscFunctionBegin; 4488 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4489 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4490 /* Should this be the type of the diagonal block of A? */ 4491 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4492 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4493 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4494 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4495 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4496 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4497 for (i=0; i<m; i++) { 4498 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4499 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4500 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4501 } 4502 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4503 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4504 4505 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4506 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4507 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4508 sprintf(name,"%s.%d",outfile,rank); 4509 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4510 ierr = PetscFree(name);CHKERRQ(ierr); 4511 ierr = MatView(B,out);CHKERRQ(ierr); 4512 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4513 ierr = MatDestroy(&B);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4518 { 4519 PetscErrorCode ierr; 4520 Mat_Merge_SeqsToMPI *merge; 4521 PetscContainer container; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4525 if (container) { 4526 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4539 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4540 ierr = PetscFree(merge);CHKERRQ(ierr); 4541 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4542 } 4543 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #include <../src/mat/utils/freespace.h> 4548 #include <petscbt.h> 4549 4550 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4551 { 4552 PetscErrorCode ierr; 4553 MPI_Comm comm; 4554 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4555 PetscMPIInt size,rank,taga,*len_s; 4556 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4557 PetscInt proc,m; 4558 PetscInt **buf_ri,**buf_rj; 4559 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4560 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4561 MPI_Request *s_waits,*r_waits; 4562 MPI_Status *status; 4563 MatScalar *aa=a->a; 4564 MatScalar **abuf_r,*ba_i; 4565 Mat_Merge_SeqsToMPI *merge; 4566 PetscContainer container; 4567 4568 PetscFunctionBegin; 4569 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4570 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4571 4572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4573 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4574 4575 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4576 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4577 4578 bi = merge->bi; 4579 bj = merge->bj; 4580 buf_ri = merge->buf_ri; 4581 buf_rj = merge->buf_rj; 4582 4583 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4584 owners = merge->rowmap->range; 4585 len_s = merge->len_s; 4586 4587 /* send and recv matrix values */ 4588 /*-----------------------------*/ 4589 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4590 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4591 4592 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4593 for (proc=0,k=0; proc<size; proc++) { 4594 if (!len_s[proc]) continue; 4595 i = owners[proc]; 4596 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4597 k++; 4598 } 4599 4600 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4601 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4602 ierr = PetscFree(status);CHKERRQ(ierr); 4603 4604 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4605 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4606 4607 /* insert mat values of mpimat */ 4608 /*----------------------------*/ 4609 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4610 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4611 4612 for (k=0; k<merge->nrecv; k++) { 4613 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4614 nrows = *(buf_ri_k[k]); 4615 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4616 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4617 } 4618 4619 /* set values of ba */ 4620 m = merge->rowmap->n; 4621 for (i=0; i<m; i++) { 4622 arow = owners[rank] + i; 4623 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4624 bnzi = bi[i+1] - bi[i]; 4625 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4626 4627 /* add local non-zero vals of this proc's seqmat into ba */ 4628 anzi = ai[arow+1] - ai[arow]; 4629 aj = a->j + ai[arow]; 4630 aa = a->a + ai[arow]; 4631 nextaj = 0; 4632 for (j=0; nextaj<anzi; j++) { 4633 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4634 ba_i[j] += aa[nextaj++]; 4635 } 4636 } 4637 4638 /* add received vals into ba */ 4639 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4640 /* i-th row */ 4641 if (i == *nextrow[k]) { 4642 anzi = *(nextai[k]+1) - *nextai[k]; 4643 aj = buf_rj[k] + *(nextai[k]); 4644 aa = abuf_r[k] + *(nextai[k]); 4645 nextaj = 0; 4646 for (j=0; nextaj<anzi; j++) { 4647 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4648 ba_i[j] += aa[nextaj++]; 4649 } 4650 } 4651 nextrow[k]++; nextai[k]++; 4652 } 4653 } 4654 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4655 } 4656 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4658 4659 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4660 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4661 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4662 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4663 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4664 PetscFunctionReturn(0); 4665 } 4666 4667 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4668 { 4669 PetscErrorCode ierr; 4670 Mat B_mpi; 4671 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4672 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4673 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4674 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4675 PetscInt len,proc,*dnz,*onz,bs,cbs; 4676 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4677 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4678 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4679 MPI_Status *status; 4680 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4681 PetscBT lnkbt; 4682 Mat_Merge_SeqsToMPI *merge; 4683 PetscContainer container; 4684 4685 PetscFunctionBegin; 4686 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4687 4688 /* make sure it is a PETSc comm */ 4689 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4690 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4692 4693 ierr = PetscNew(&merge);CHKERRQ(ierr); 4694 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4695 4696 /* determine row ownership */ 4697 /*---------------------------------------------------------*/ 4698 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4699 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4700 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4701 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4702 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4703 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4704 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4705 4706 m = merge->rowmap->n; 4707 owners = merge->rowmap->range; 4708 4709 /* determine the number of messages to send, their lengths */ 4710 /*---------------------------------------------------------*/ 4711 len_s = merge->len_s; 4712 4713 len = 0; /* length of buf_si[] */ 4714 merge->nsend = 0; 4715 for (proc=0; proc<size; proc++) { 4716 len_si[proc] = 0; 4717 if (proc == rank) { 4718 len_s[proc] = 0; 4719 } else { 4720 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4721 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4722 } 4723 if (len_s[proc]) { 4724 merge->nsend++; 4725 nrows = 0; 4726 for (i=owners[proc]; i<owners[proc+1]; i++) { 4727 if (ai[i+1] > ai[i]) nrows++; 4728 } 4729 len_si[proc] = 2*(nrows+1); 4730 len += len_si[proc]; 4731 } 4732 } 4733 4734 /* determine the number and length of messages to receive for ij-structure */ 4735 /*-------------------------------------------------------------------------*/ 4736 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4737 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4738 4739 /* post the Irecv of j-structure */ 4740 /*-------------------------------*/ 4741 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4742 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4743 4744 /* post the Isend of j-structure */ 4745 /*--------------------------------*/ 4746 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4747 4748 for (proc=0, k=0; proc<size; proc++) { 4749 if (!len_s[proc]) continue; 4750 i = owners[proc]; 4751 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4752 k++; 4753 } 4754 4755 /* receives and sends of j-structure are complete */ 4756 /*------------------------------------------------*/ 4757 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4758 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4759 4760 /* send and recv i-structure */ 4761 /*---------------------------*/ 4762 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4763 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4764 4765 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4766 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4767 for (proc=0,k=0; proc<size; proc++) { 4768 if (!len_s[proc]) continue; 4769 /* form outgoing message for i-structure: 4770 buf_si[0]: nrows to be sent 4771 [1:nrows]: row index (global) 4772 [nrows+1:2*nrows+1]: i-structure index 4773 */ 4774 /*-------------------------------------------*/ 4775 nrows = len_si[proc]/2 - 1; 4776 buf_si_i = buf_si + nrows+1; 4777 buf_si[0] = nrows; 4778 buf_si_i[0] = 0; 4779 nrows = 0; 4780 for (i=owners[proc]; i<owners[proc+1]; i++) { 4781 anzi = ai[i+1] - ai[i]; 4782 if (anzi) { 4783 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4784 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4785 nrows++; 4786 } 4787 } 4788 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4789 k++; 4790 buf_si += len_si[proc]; 4791 } 4792 4793 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4794 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4795 4796 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4797 for (i=0; i<merge->nrecv; i++) { 4798 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4799 } 4800 4801 ierr = PetscFree(len_si);CHKERRQ(ierr); 4802 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4803 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4804 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4805 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4806 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4807 ierr = PetscFree(status);CHKERRQ(ierr); 4808 4809 /* compute a local seq matrix in each processor */ 4810 /*----------------------------------------------*/ 4811 /* allocate bi array and free space for accumulating nonzero column info */ 4812 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4813 bi[0] = 0; 4814 4815 /* create and initialize a linked list */ 4816 nlnk = N+1; 4817 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 4819 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4820 len = ai[owners[rank+1]] - ai[owners[rank]]; 4821 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4822 4823 current_space = free_space; 4824 4825 /* determine symbolic info for each local row */ 4826 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4827 4828 for (k=0; k<merge->nrecv; k++) { 4829 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4830 nrows = *buf_ri_k[k]; 4831 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4832 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4833 } 4834 4835 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4836 len = 0; 4837 for (i=0; i<m; i++) { 4838 bnzi = 0; 4839 /* add local non-zero cols of this proc's seqmat into lnk */ 4840 arow = owners[rank] + i; 4841 anzi = ai[arow+1] - ai[arow]; 4842 aj = a->j + ai[arow]; 4843 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4844 bnzi += nlnk; 4845 /* add received col data into lnk */ 4846 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4847 if (i == *nextrow[k]) { /* i-th row */ 4848 anzi = *(nextai[k]+1) - *nextai[k]; 4849 aj = buf_rj[k] + *nextai[k]; 4850 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4851 bnzi += nlnk; 4852 nextrow[k]++; nextai[k]++; 4853 } 4854 } 4855 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4856 4857 /* if free space is not available, make more free space */ 4858 if (current_space->local_remaining<bnzi) { 4859 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4860 nspacedouble++; 4861 } 4862 /* copy data into free space, then initialize lnk */ 4863 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4864 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4865 4866 current_space->array += bnzi; 4867 current_space->local_used += bnzi; 4868 current_space->local_remaining -= bnzi; 4869 4870 bi[i+1] = bi[i] + bnzi; 4871 } 4872 4873 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4874 4875 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4876 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4877 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4878 4879 /* create symbolic parallel matrix B_mpi */ 4880 /*---------------------------------------*/ 4881 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4882 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4883 if (n==PETSC_DECIDE) { 4884 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4885 } else { 4886 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4887 } 4888 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4889 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4890 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4891 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4892 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4893 4894 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4895 B_mpi->assembled = PETSC_FALSE; 4896 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4897 merge->bi = bi; 4898 merge->bj = bj; 4899 merge->buf_ri = buf_ri; 4900 merge->buf_rj = buf_rj; 4901 merge->coi = NULL; 4902 merge->coj = NULL; 4903 merge->owners_co = NULL; 4904 4905 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4906 4907 /* attach the supporting struct to B_mpi for reuse */ 4908 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4909 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4910 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4911 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4912 *mpimat = B_mpi; 4913 4914 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4915 PetscFunctionReturn(0); 4916 } 4917 4918 /*@C 4919 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4920 matrices from each processor 4921 4922 Collective on MPI_Comm 4923 4924 Input Parameters: 4925 + comm - the communicators the parallel matrix will live on 4926 . seqmat - the input sequential matrices 4927 . m - number of local rows (or PETSC_DECIDE) 4928 . n - number of local columns (or PETSC_DECIDE) 4929 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4930 4931 Output Parameter: 4932 . mpimat - the parallel matrix generated 4933 4934 Level: advanced 4935 4936 Notes: 4937 The dimensions of the sequential matrix in each processor MUST be the same. 4938 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4939 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4940 @*/ 4941 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4942 { 4943 PetscErrorCode ierr; 4944 PetscMPIInt size; 4945 4946 PetscFunctionBegin; 4947 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4948 if (size == 1) { 4949 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4950 if (scall == MAT_INITIAL_MATRIX) { 4951 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4952 } else { 4953 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4954 } 4955 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4959 if (scall == MAT_INITIAL_MATRIX) { 4960 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4961 } 4962 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4963 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 /*@ 4968 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4969 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4970 with MatGetSize() 4971 4972 Not Collective 4973 4974 Input Parameters: 4975 + A - the matrix 4976 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4977 4978 Output Parameter: 4979 . A_loc - the local sequential matrix generated 4980 4981 Level: developer 4982 4983 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4984 4985 @*/ 4986 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4987 { 4988 PetscErrorCode ierr; 4989 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4990 Mat_SeqAIJ *mat,*a,*b; 4991 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4992 MatScalar *aa,*ba,*cam; 4993 PetscScalar *ca; 4994 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4995 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4996 PetscBool match; 4997 MPI_Comm comm; 4998 PetscMPIInt size; 4999 5000 PetscFunctionBegin; 5001 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5002 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5003 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5005 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5006 5007 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5008 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5009 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5010 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5011 aa = a->a; ba = b->a; 5012 if (scall == MAT_INITIAL_MATRIX) { 5013 if (size == 1) { 5014 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5015 PetscFunctionReturn(0); 5016 } 5017 5018 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5019 ci[0] = 0; 5020 for (i=0; i<am; i++) { 5021 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5022 } 5023 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5024 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5025 k = 0; 5026 for (i=0; i<am; i++) { 5027 ncols_o = bi[i+1] - bi[i]; 5028 ncols_d = ai[i+1] - ai[i]; 5029 /* off-diagonal portion of A */ 5030 for (jo=0; jo<ncols_o; jo++) { 5031 col = cmap[*bj]; 5032 if (col >= cstart) break; 5033 cj[k] = col; bj++; 5034 ca[k++] = *ba++; 5035 } 5036 /* diagonal portion of A */ 5037 for (j=0; j<ncols_d; j++) { 5038 cj[k] = cstart + *aj++; 5039 ca[k++] = *aa++; 5040 } 5041 /* off-diagonal portion of A */ 5042 for (j=jo; j<ncols_o; j++) { 5043 cj[k] = cmap[*bj++]; 5044 ca[k++] = *ba++; 5045 } 5046 } 5047 /* put together the new matrix */ 5048 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5049 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5050 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5051 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5052 mat->free_a = PETSC_TRUE; 5053 mat->free_ij = PETSC_TRUE; 5054 mat->nonew = 0; 5055 } else if (scall == MAT_REUSE_MATRIX) { 5056 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5057 ci = mat->i; cj = mat->j; cam = mat->a; 5058 for (i=0; i<am; i++) { 5059 /* off-diagonal portion of A */ 5060 ncols_o = bi[i+1] - bi[i]; 5061 for (jo=0; jo<ncols_o; jo++) { 5062 col = cmap[*bj]; 5063 if (col >= cstart) break; 5064 *cam++ = *ba++; bj++; 5065 } 5066 /* diagonal portion of A */ 5067 ncols_d = ai[i+1] - ai[i]; 5068 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5069 /* off-diagonal portion of A */ 5070 for (j=jo; j<ncols_o; j++) { 5071 *cam++ = *ba++; bj++; 5072 } 5073 } 5074 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5075 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5076 PetscFunctionReturn(0); 5077 } 5078 5079 /*@C 5080 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5081 5082 Not Collective 5083 5084 Input Parameters: 5085 + A - the matrix 5086 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5087 - row, col - index sets of rows and columns to extract (or NULL) 5088 5089 Output Parameter: 5090 . A_loc - the local sequential matrix generated 5091 5092 Level: developer 5093 5094 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5095 5096 @*/ 5097 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5098 { 5099 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5100 PetscErrorCode ierr; 5101 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5102 IS isrowa,iscola; 5103 Mat *aloc; 5104 PetscBool match; 5105 5106 PetscFunctionBegin; 5107 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5108 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5109 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5110 if (!row) { 5111 start = A->rmap->rstart; end = A->rmap->rend; 5112 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5113 } else { 5114 isrowa = *row; 5115 } 5116 if (!col) { 5117 start = A->cmap->rstart; 5118 cmap = a->garray; 5119 nzA = a->A->cmap->n; 5120 nzB = a->B->cmap->n; 5121 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5122 ncols = 0; 5123 for (i=0; i<nzB; i++) { 5124 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5125 else break; 5126 } 5127 imark = i; 5128 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5129 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5130 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5131 } else { 5132 iscola = *col; 5133 } 5134 if (scall != MAT_INITIAL_MATRIX) { 5135 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5136 aloc[0] = *A_loc; 5137 } 5138 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5139 if (!col) { /* attach global id of condensed columns */ 5140 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5141 } 5142 *A_loc = aloc[0]; 5143 ierr = PetscFree(aloc);CHKERRQ(ierr); 5144 if (!row) { 5145 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5146 } 5147 if (!col) { 5148 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5149 } 5150 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5151 PetscFunctionReturn(0); 5152 } 5153 5154 /*@C 5155 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5156 5157 Collective on Mat 5158 5159 Input Parameters: 5160 + A,B - the matrices in mpiaij format 5161 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5162 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5163 5164 Output Parameter: 5165 + rowb, colb - index sets of rows and columns of B to extract 5166 - B_seq - the sequential matrix generated 5167 5168 Level: developer 5169 5170 @*/ 5171 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5172 { 5173 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5174 PetscErrorCode ierr; 5175 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5176 IS isrowb,iscolb; 5177 Mat *bseq=NULL; 5178 5179 PetscFunctionBegin; 5180 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5181 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5182 } 5183 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5184 5185 if (scall == MAT_INITIAL_MATRIX) { 5186 start = A->cmap->rstart; 5187 cmap = a->garray; 5188 nzA = a->A->cmap->n; 5189 nzB = a->B->cmap->n; 5190 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5191 ncols = 0; 5192 for (i=0; i<nzB; i++) { /* row < local row index */ 5193 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5194 else break; 5195 } 5196 imark = i; 5197 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5198 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5199 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5200 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5201 } else { 5202 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5203 isrowb = *rowb; iscolb = *colb; 5204 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5205 bseq[0] = *B_seq; 5206 } 5207 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5208 *B_seq = bseq[0]; 5209 ierr = PetscFree(bseq);CHKERRQ(ierr); 5210 if (!rowb) { 5211 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5212 } else { 5213 *rowb = isrowb; 5214 } 5215 if (!colb) { 5216 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5217 } else { 5218 *colb = iscolb; 5219 } 5220 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5221 PetscFunctionReturn(0); 5222 } 5223 5224 /* 5225 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5226 of the OFF-DIAGONAL portion of local A 5227 5228 Collective on Mat 5229 5230 Input Parameters: 5231 + A,B - the matrices in mpiaij format 5232 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5233 5234 Output Parameter: 5235 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5236 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5237 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5238 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5239 5240 Level: developer 5241 5242 */ 5243 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5244 { 5245 VecScatter_MPI_General *gen_to,*gen_from; 5246 PetscErrorCode ierr; 5247 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5248 Mat_SeqAIJ *b_oth; 5249 VecScatter ctx; 5250 MPI_Comm comm; 5251 PetscMPIInt *rprocs,*sprocs,tag,rank; 5252 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5253 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5254 PetscScalar *b_otha,*bufa,*bufA,*vals; 5255 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5256 MPI_Request *rwaits = NULL,*swaits = NULL; 5257 MPI_Status *sstatus,rstatus; 5258 PetscMPIInt jj,size; 5259 VecScatterType type; 5260 PetscBool mpi1; 5261 5262 PetscFunctionBegin; 5263 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5265 5266 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5267 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5268 } 5269 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5270 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5271 5272 if (size == 1) { 5273 startsj_s = NULL; 5274 bufa_ptr = NULL; 5275 *B_oth = NULL; 5276 PetscFunctionReturn(0); 5277 } 5278 5279 ctx = a->Mvctx; 5280 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5281 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5282 if (!mpi1) { 5283 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5284 thus create a->Mvctx_mpi1 */ 5285 if (!a->Mvctx_mpi1) { 5286 a->Mvctx_mpi1_flg = PETSC_TRUE; 5287 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5288 } 5289 ctx = a->Mvctx_mpi1; 5290 } 5291 tag = ((PetscObject)ctx)->tag; 5292 5293 gen_to = (VecScatter_MPI_General*)ctx->todata; 5294 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5295 nrecvs = gen_from->n; 5296 nsends = gen_to->n; 5297 5298 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5299 srow = gen_to->indices; /* local row index to be sent */ 5300 sstarts = gen_to->starts; 5301 sprocs = gen_to->procs; 5302 sstatus = gen_to->sstatus; 5303 sbs = gen_to->bs; 5304 rstarts = gen_from->starts; 5305 rprocs = gen_from->procs; 5306 rbs = gen_from->bs; 5307 5308 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5309 if (scall == MAT_INITIAL_MATRIX) { 5310 /* i-array */ 5311 /*---------*/ 5312 /* post receives */ 5313 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5314 for (i=0; i<nrecvs; i++) { 5315 rowlen = rvalues + rstarts[i]*rbs; 5316 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5317 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5318 } 5319 5320 /* pack the outgoing message */ 5321 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5322 5323 sstartsj[0] = 0; 5324 rstartsj[0] = 0; 5325 len = 0; /* total length of j or a array to be sent */ 5326 k = 0; 5327 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5328 for (i=0; i<nsends; i++) { 5329 rowlen = svalues + sstarts[i]*sbs; 5330 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5331 for (j=0; j<nrows; j++) { 5332 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5333 for (l=0; l<sbs; l++) { 5334 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5335 5336 rowlen[j*sbs+l] = ncols; 5337 5338 len += ncols; 5339 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5340 } 5341 k++; 5342 } 5343 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5344 5345 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5346 } 5347 /* recvs and sends of i-array are completed */ 5348 i = nrecvs; 5349 while (i--) { 5350 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5351 } 5352 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5353 ierr = PetscFree(svalues);CHKERRQ(ierr); 5354 5355 /* allocate buffers for sending j and a arrays */ 5356 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5357 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5358 5359 /* create i-array of B_oth */ 5360 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5361 5362 b_othi[0] = 0; 5363 len = 0; /* total length of j or a array to be received */ 5364 k = 0; 5365 for (i=0; i<nrecvs; i++) { 5366 rowlen = rvalues + rstarts[i]*rbs; 5367 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5368 for (j=0; j<nrows; j++) { 5369 b_othi[k+1] = b_othi[k] + rowlen[j]; 5370 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5371 k++; 5372 } 5373 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5374 } 5375 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5376 5377 /* allocate space for j and a arrrays of B_oth */ 5378 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5379 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5380 5381 /* j-array */ 5382 /*---------*/ 5383 /* post receives of j-array */ 5384 for (i=0; i<nrecvs; i++) { 5385 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5386 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5387 } 5388 5389 /* pack the outgoing message j-array */ 5390 k = 0; 5391 for (i=0; i<nsends; i++) { 5392 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5393 bufJ = bufj+sstartsj[i]; 5394 for (j=0; j<nrows; j++) { 5395 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5396 for (ll=0; ll<sbs; ll++) { 5397 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5398 for (l=0; l<ncols; l++) { 5399 *bufJ++ = cols[l]; 5400 } 5401 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5402 } 5403 } 5404 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5405 } 5406 5407 /* recvs and sends of j-array are completed */ 5408 i = nrecvs; 5409 while (i--) { 5410 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5411 } 5412 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5413 } else if (scall == MAT_REUSE_MATRIX) { 5414 sstartsj = *startsj_s; 5415 rstartsj = *startsj_r; 5416 bufa = *bufa_ptr; 5417 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5418 b_otha = b_oth->a; 5419 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5420 5421 /* a-array */ 5422 /*---------*/ 5423 /* post receives of a-array */ 5424 for (i=0; i<nrecvs; i++) { 5425 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5426 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5427 } 5428 5429 /* pack the outgoing message a-array */ 5430 k = 0; 5431 for (i=0; i<nsends; i++) { 5432 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5433 bufA = bufa+sstartsj[i]; 5434 for (j=0; j<nrows; j++) { 5435 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5436 for (ll=0; ll<sbs; ll++) { 5437 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5438 for (l=0; l<ncols; l++) { 5439 *bufA++ = vals[l]; 5440 } 5441 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5442 } 5443 } 5444 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5445 } 5446 /* recvs and sends of a-array are completed */ 5447 i = nrecvs; 5448 while (i--) { 5449 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5450 } 5451 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5452 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5453 5454 if (scall == MAT_INITIAL_MATRIX) { 5455 /* put together the new matrix */ 5456 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5457 5458 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5459 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5460 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5461 b_oth->free_a = PETSC_TRUE; 5462 b_oth->free_ij = PETSC_TRUE; 5463 b_oth->nonew = 0; 5464 5465 ierr = PetscFree(bufj);CHKERRQ(ierr); 5466 if (!startsj_s || !bufa_ptr) { 5467 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5468 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5469 } else { 5470 *startsj_s = sstartsj; 5471 *startsj_r = rstartsj; 5472 *bufa_ptr = bufa; 5473 } 5474 } 5475 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5476 PetscFunctionReturn(0); 5477 } 5478 5479 /*@C 5480 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5481 5482 Not Collective 5483 5484 Input Parameters: 5485 . A - The matrix in mpiaij format 5486 5487 Output Parameter: 5488 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5489 . colmap - A map from global column index to local index into lvec 5490 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5491 5492 Level: developer 5493 5494 @*/ 5495 #if defined(PETSC_USE_CTABLE) 5496 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5497 #else 5498 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5499 #endif 5500 { 5501 Mat_MPIAIJ *a; 5502 5503 PetscFunctionBegin; 5504 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5505 PetscValidPointer(lvec, 2); 5506 PetscValidPointer(colmap, 3); 5507 PetscValidPointer(multScatter, 4); 5508 a = (Mat_MPIAIJ*) A->data; 5509 if (lvec) *lvec = a->lvec; 5510 if (colmap) *colmap = a->colmap; 5511 if (multScatter) *multScatter = a->Mvctx; 5512 PetscFunctionReturn(0); 5513 } 5514 5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5517 #if defined(PETSC_HAVE_MKL_SPARSE) 5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5519 #endif 5520 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5521 #if defined(PETSC_HAVE_ELEMENTAL) 5522 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_HYPRE) 5525 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5526 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5527 #endif 5528 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5530 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5531 5532 /* 5533 Computes (B'*A')' since computing B*A directly is untenable 5534 5535 n p p 5536 ( ) ( ) ( ) 5537 m ( A ) * n ( B ) = m ( C ) 5538 ( ) ( ) ( ) 5539 5540 */ 5541 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5542 { 5543 PetscErrorCode ierr; 5544 Mat At,Bt,Ct; 5545 5546 PetscFunctionBegin; 5547 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5548 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5549 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5550 ierr = MatDestroy(&At);CHKERRQ(ierr); 5551 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5552 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5553 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5554 PetscFunctionReturn(0); 5555 } 5556 5557 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5558 { 5559 PetscErrorCode ierr; 5560 PetscInt m=A->rmap->n,n=B->cmap->n; 5561 Mat Cmat; 5562 5563 PetscFunctionBegin; 5564 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5565 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5566 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5567 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5568 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5569 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5570 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5571 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5572 5573 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5574 5575 *C = Cmat; 5576 PetscFunctionReturn(0); 5577 } 5578 5579 /* ----------------------------------------------------------------*/ 5580 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5581 { 5582 PetscErrorCode ierr; 5583 5584 PetscFunctionBegin; 5585 if (scall == MAT_INITIAL_MATRIX) { 5586 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5587 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5588 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5589 } 5590 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5591 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5592 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 /*MC 5597 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5598 5599 Options Database Keys: 5600 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5601 5602 Level: beginner 5603 5604 .seealso: MatCreateAIJ() 5605 M*/ 5606 5607 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5608 { 5609 Mat_MPIAIJ *b; 5610 PetscErrorCode ierr; 5611 PetscMPIInt size; 5612 5613 PetscFunctionBegin; 5614 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5615 5616 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5617 B->data = (void*)b; 5618 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5619 B->assembled = PETSC_FALSE; 5620 B->insertmode = NOT_SET_VALUES; 5621 b->size = size; 5622 5623 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5624 5625 /* build cache for off array entries formed */ 5626 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5627 5628 b->donotstash = PETSC_FALSE; 5629 b->colmap = 0; 5630 b->garray = 0; 5631 b->roworiented = PETSC_TRUE; 5632 5633 /* stuff used for matrix vector multiply */ 5634 b->lvec = NULL; 5635 b->Mvctx = NULL; 5636 5637 /* stuff for MatGetRow() */ 5638 b->rowindices = 0; 5639 b->rowvalues = 0; 5640 b->getrowactive = PETSC_FALSE; 5641 5642 /* flexible pointer used in CUSP/CUSPARSE classes */ 5643 b->spptr = NULL; 5644 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5654 #if defined(PETSC_HAVE_MKL_SPARSE) 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5656 #endif 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5659 #if defined(PETSC_HAVE_ELEMENTAL) 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_HYPRE) 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5664 #endif 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5670 #if defined(PETSC_HAVE_HYPRE) 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5672 #endif 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5675 PetscFunctionReturn(0); 5676 } 5677 5678 /*@C 5679 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5680 and "off-diagonal" part of the matrix in CSR format. 5681 5682 Collective on MPI_Comm 5683 5684 Input Parameters: 5685 + comm - MPI communicator 5686 . m - number of local rows (Cannot be PETSC_DECIDE) 5687 . n - This value should be the same as the local size used in creating the 5688 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5689 calculated if N is given) For square matrices n is almost always m. 5690 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5691 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5692 . i - row indices for "diagonal" portion of matrix 5693 . j - column indices 5694 . a - matrix values 5695 . oi - row indices for "off-diagonal" portion of matrix 5696 . oj - column indices 5697 - oa - matrix values 5698 5699 Output Parameter: 5700 . mat - the matrix 5701 5702 Level: advanced 5703 5704 Notes: 5705 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5706 must free the arrays once the matrix has been destroyed and not before. 5707 5708 The i and j indices are 0 based 5709 5710 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5711 5712 This sets local rows and cannot be used to set off-processor values. 5713 5714 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5715 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5716 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5717 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5718 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5719 communication if it is known that only local entries will be set. 5720 5721 .keywords: matrix, aij, compressed row, sparse, parallel 5722 5723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5724 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5725 @*/ 5726 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5727 { 5728 PetscErrorCode ierr; 5729 Mat_MPIAIJ *maij; 5730 5731 PetscFunctionBegin; 5732 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5733 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5734 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5735 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5736 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5737 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5738 maij = (Mat_MPIAIJ*) (*mat)->data; 5739 5740 (*mat)->preallocated = PETSC_TRUE; 5741 5742 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5743 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5744 5745 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5746 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5747 5748 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5750 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 5753 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5754 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5755 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5757 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5758 PetscFunctionReturn(0); 5759 } 5760 5761 /* 5762 Special version for direct calls from Fortran 5763 */ 5764 #include <petsc/private/fortranimpl.h> 5765 5766 /* Change these macros so can be used in void function */ 5767 #undef CHKERRQ 5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5769 #undef SETERRQ2 5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ3 5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5773 #undef SETERRQ 5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5775 5776 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5777 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5778 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5779 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5780 #else 5781 #endif 5782 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5783 { 5784 Mat mat = *mmat; 5785 PetscInt m = *mm, n = *mn; 5786 InsertMode addv = *maddv; 5787 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5788 PetscScalar value; 5789 PetscErrorCode ierr; 5790 5791 MatCheckPreallocated(mat,1); 5792 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5793 5794 #if defined(PETSC_USE_DEBUG) 5795 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5796 #endif 5797 { 5798 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5799 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5800 PetscBool roworiented = aij->roworiented; 5801 5802 /* Some Variables required in the macro */ 5803 Mat A = aij->A; 5804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5805 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5806 MatScalar *aa = a->a; 5807 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5808 Mat B = aij->B; 5809 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5810 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5811 MatScalar *ba = b->a; 5812 5813 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5814 PetscInt nonew = a->nonew; 5815 MatScalar *ap1,*ap2; 5816 5817 PetscFunctionBegin; 5818 for (i=0; i<m; i++) { 5819 if (im[i] < 0) continue; 5820 #if defined(PETSC_USE_DEBUG) 5821 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5822 #endif 5823 if (im[i] >= rstart && im[i] < rend) { 5824 row = im[i] - rstart; 5825 lastcol1 = -1; 5826 rp1 = aj + ai[row]; 5827 ap1 = aa + ai[row]; 5828 rmax1 = aimax[row]; 5829 nrow1 = ailen[row]; 5830 low1 = 0; 5831 high1 = nrow1; 5832 lastcol2 = -1; 5833 rp2 = bj + bi[row]; 5834 ap2 = ba + bi[row]; 5835 rmax2 = bimax[row]; 5836 nrow2 = bilen[row]; 5837 low2 = 0; 5838 high2 = nrow2; 5839 5840 for (j=0; j<n; j++) { 5841 if (roworiented) value = v[i*n+j]; 5842 else value = v[i+j*m]; 5843 if (in[j] >= cstart && in[j] < cend) { 5844 col = in[j] - cstart; 5845 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5846 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5847 } else if (in[j] < 0) continue; 5848 #if defined(PETSC_USE_DEBUG) 5849 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5850 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5851 #endif 5852 else { 5853 if (mat->was_assembled) { 5854 if (!aij->colmap) { 5855 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5856 } 5857 #if defined(PETSC_USE_CTABLE) 5858 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5859 col--; 5860 #else 5861 col = aij->colmap[in[j]] - 1; 5862 #endif 5863 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5864 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5866 col = in[j]; 5867 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5868 B = aij->B; 5869 b = (Mat_SeqAIJ*)B->data; 5870 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5871 rp2 = bj + bi[row]; 5872 ap2 = ba + bi[row]; 5873 rmax2 = bimax[row]; 5874 nrow2 = bilen[row]; 5875 low2 = 0; 5876 high2 = nrow2; 5877 bm = aij->B->rmap->n; 5878 ba = b->a; 5879 } 5880 } else col = in[j]; 5881 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5882 } 5883 } 5884 } else if (!aij->donotstash) { 5885 if (roworiented) { 5886 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5887 } else { 5888 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5889 } 5890 } 5891 } 5892 } 5893 PetscFunctionReturnVoid(); 5894 } 5895