1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscBool lf; 1105 PetscMPIInt size; 1106 1107 PetscFunctionBegin; 1108 /* Easy test: symmetric diagonal block */ 1109 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1110 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1111 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1112 if (!*f) PetscFunctionReturn(0); 1113 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size == 1) PetscFunctionReturn(0); 1116 1117 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1118 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1121 for (i=0; i<first; i++) notme[i] = i; 1122 for (i=last; i<M; i++) notme[i-last+first] = i; 1123 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1124 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1125 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1126 Aoff = Aoffs[0]; 1127 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1128 Boff = Boffs[0]; 1129 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1130 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1131 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1132 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1133 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1134 ierr = PetscFree(notme);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 /* receive remote parts */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 This only works correctly for square matrices where the subblock A->A is the 1166 diagonal block 1167 */ 1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1169 { 1170 PetscErrorCode ierr; 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 1173 PetscFunctionBegin; 1174 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1175 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1187 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1202 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1212 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1213 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1214 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1215 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_ELEMENTAL) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1242 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1243 PetscErrorCode ierr; 1244 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1245 int fd; 1246 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1247 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1248 PetscScalar *column_values; 1249 PetscInt message_count,flowcontrolcount; 1250 FILE *file; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1255 nz = A->nz + B->nz; 1256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 if (!rank) { 1258 header[0] = MAT_FILE_CLASSID; 1259 header[1] = mat->rmap->N; 1260 header[2] = mat->cmap->N; 1261 1262 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 /* get largest number of rows any processor has */ 1265 rlen = mat->rmap->n; 1266 range = mat->rmap->range; 1267 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1268 } else { 1269 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 rlen = mat->rmap->n; 1271 } 1272 1273 /* load up the local row counts */ 1274 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1275 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1276 1277 /* store the row lengths to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1283 rlen = range[i+1] - range[i]; 1284 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1286 } 1287 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1290 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1291 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1294 1295 /* load up the local column indices */ 1296 nzmax = nz; /* th processor needs space a largest processor needs */ 1297 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ((col = garray[B->j[j]]) > cstart) break; 1303 column_indices[cnt++] = col; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1306 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1307 } 1308 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1309 1310 /* store the column indices to the file */ 1311 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1312 if (!rank) { 1313 MPI_Status status; 1314 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 for (i=1; i<size; i++) { 1316 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1317 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1319 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1325 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1326 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1328 } 1329 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1330 1331 /* load up the local column values */ 1332 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1333 cnt = 0; 1334 for (i=0; i<mat->rmap->n; i++) { 1335 for (j=B->i[i]; j<B->i[i+1]; j++) { 1336 if (garray[B->j[j]] > cstart) break; 1337 column_values[cnt++] = B->a[j]; 1338 } 1339 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1340 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1341 } 1342 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1343 1344 /* store the column values to the file */ 1345 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1346 if (!rank) { 1347 MPI_Status status; 1348 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1349 for (i=1; i<size; i++) { 1350 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1351 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1352 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1353 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1360 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(column_values);CHKERRQ(ierr); 1364 1365 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1366 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #include <petscdraw.h> 1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 PetscErrorCode ierr; 1375 PetscMPIInt rank = aij->rank,size = aij->size; 1376 PetscBool isdraw,iascii,isbinary; 1377 PetscViewer sviewer; 1378 PetscViewerFormat format; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 if (iascii) { 1385 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1386 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1387 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1388 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1389 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 for (i=0; i<(PetscInt)size; i++) { 1391 nmax = PetscMax(nmax,nz[i]); 1392 nmin = PetscMin(nmin,nz[i]); 1393 navg += nz[i]; 1394 } 1395 ierr = PetscFree(nz);CHKERRQ(ierr); 1396 navg = navg/size; 1397 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1401 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1402 MatInfo info; 1403 PetscBool inodes; 1404 1405 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1406 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1407 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1409 if (!inodes) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1414 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1415 } 1416 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1423 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1426 PetscInt inodecount,inodelimit,*inodes; 1427 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1428 if (inodes) { 1429 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1430 } else { 1431 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1435 PetscFunctionReturn(0); 1436 } 1437 } else if (isbinary) { 1438 if (size == 1) { 1439 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1440 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (isdraw) { 1446 PetscDraw draw; 1447 PetscBool isnull; 1448 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1449 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1450 if (isnull) PetscFunctionReturn(0); 1451 } 1452 1453 { 1454 /* assemble the entire matrix onto first processor. */ 1455 Mat A; 1456 Mat_SeqAIJ *Aloc; 1457 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1458 MatScalar *a; 1459 1460 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1461 if (!rank) { 1462 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1465 } 1466 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1467 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1468 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1469 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqAIJ*)aij->A->data; 1474 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; 1480 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1481 } 1482 aj = Aloc->j; 1483 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1484 1485 /* copy over the B part */ 1486 Aloc = (Mat_SeqAIJ*)aij->B->data; 1487 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1488 row = mat->rmap->rstart; 1489 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1490 ct = cols; 1491 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1492 for (i=0; i<m; i++) { 1493 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1494 row++; 1495 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1496 } 1497 ierr = PetscFree(ct);CHKERRQ(ierr); 1498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 /* 1501 Everyone has to call to draw the matrix since the graphics waits are 1502 synchronized across all processors that share the PetscDraw object 1503 */ 1504 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1508 } 1509 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1517 { 1518 PetscErrorCode ierr; 1519 PetscBool iascii,isdraw,issocket,isbinary; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1526 if (iascii || isdraw || isbinary || issocket) { 1527 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1533 { 1534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1535 PetscErrorCode ierr; 1536 Vec bb1 = 0; 1537 PetscBool hasop; 1538 1539 PetscFunctionBegin; 1540 if (flag == SOR_APPLY_UPPER) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1547 } 1548 1549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_EISENSTAT) { 1599 Vec xx1; 1600 1601 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1603 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 if (!mat->diag) { 1607 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1608 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1609 } 1610 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1611 if (hasop) { 1612 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1613 } else { 1614 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1615 } 1616 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1617 1618 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1622 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1623 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1624 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1625 1626 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1627 1628 matin->factorerrortype = mat->A->factorerrortype; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1633 { 1634 Mat aA,aB,Aperm; 1635 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1636 PetscScalar *aa,*ba; 1637 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1638 PetscSF rowsf,sf; 1639 IS parcolp = NULL; 1640 PetscBool done; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1645 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1648 1649 /* Invert row permutation to find out where my rows should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1654 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 1657 /* Invert column permutation to find out where my columns should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1661 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1662 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1665 1666 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1667 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1668 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1669 1670 /* Find out where my gcols should go */ 1671 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1681 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 PetscInt row = rdest[i],rowner; 1685 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1686 for (j=ai[i]; j<ai[i+1]; j++) { 1687 PetscInt cowner,col = cdest[aj[j]]; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 for (j=bi[i]; j<bi[i+1]; j++) { 1693 PetscInt cowner,col = gcdest[bj[j]]; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 } 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1704 1705 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1706 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1708 for (i=0; i<m; i++) { 1709 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1710 PetscInt j0,rowlen; 1711 rowlen = ai[i+1] - ai[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 rowlen = bi[i+1] - bi[i]; 1717 for (j0=j=0; j<rowlen; j0=j) { 1718 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1719 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 } 1722 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1726 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1728 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1729 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1730 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1731 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1732 *B = Aperm; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1743 if (ghosts) *ghosts = aij->garray; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1759 isend[3] = info->memory; isend[4] = info->mallocs; 1760 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } 1788 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1789 info->fill_ratio_needed = 0; 1790 info->factor_mallocs = 0; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 MatCheckPreallocated(A,1); 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 MatCheckPreallocated(A,1); 1814 a->roworiented = flg; 1815 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 case MAT_SPD: 1826 A->spd_set = PETSC_TRUE; 1827 A->spd = flg; 1828 if (flg) { 1829 A->symmetric = PETSC_TRUE; 1830 A->structurally_symmetric = PETSC_TRUE; 1831 A->symmetric_set = PETSC_TRUE; 1832 A->structurally_symmetric_set = PETSC_TRUE; 1833 } 1834 break; 1835 case MAT_SYMMETRIC: 1836 MatCheckPreallocated(A,1); 1837 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1838 break; 1839 case MAT_STRUCTURALLY_SYMMETRIC: 1840 MatCheckPreallocated(A,1); 1841 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1842 break; 1843 case MAT_HERMITIAN: 1844 MatCheckPreallocated(A,1); 1845 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1846 break; 1847 case MAT_SYMMETRY_ETERNAL: 1848 MatCheckPreallocated(A,1); 1849 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1850 break; 1851 case MAT_SUBMAT_SINGLEIS: 1852 A->submat_singleis = flg; 1853 break; 1854 case MAT_STRUCTURE_ONLY: 1855 /* The option is handled directly by MatSetOption() */ 1856 break; 1857 default: 1858 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1859 } 1860 PetscFunctionReturn(0); 1861 } 1862 1863 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1864 { 1865 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1866 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1867 PetscErrorCode ierr; 1868 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1869 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1870 PetscInt *cmap,*idx_p; 1871 1872 PetscFunctionBegin; 1873 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1874 mat->getrowactive = PETSC_TRUE; 1875 1876 if (!mat->rowvalues && (idx || v)) { 1877 /* 1878 allocate enough space to hold information from the longest row. 1879 */ 1880 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1881 PetscInt max = 1,tmp; 1882 for (i=0; i<matin->rmap->n; i++) { 1883 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1884 if (max < tmp) max = tmp; 1885 } 1886 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1887 } 1888 1889 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1890 lrow = row - rstart; 1891 1892 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1893 if (!v) {pvA = 0; pvB = 0;} 1894 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1895 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1896 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1897 nztot = nzA + nzB; 1898 1899 cmap = mat->garray; 1900 if (v || idx) { 1901 if (nztot) { 1902 /* Sort by increasing column numbers, assuming A and B already sorted */ 1903 PetscInt imark = -1; 1904 if (v) { 1905 *v = v_p = mat->rowvalues; 1906 for (i=0; i<nzB; i++) { 1907 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1908 else break; 1909 } 1910 imark = i; 1911 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1912 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1913 } 1914 if (idx) { 1915 *idx = idx_p = mat->rowindices; 1916 if (imark > -1) { 1917 for (i=0; i<imark; i++) { 1918 idx_p[i] = cmap[cworkB[i]]; 1919 } 1920 } else { 1921 for (i=0; i<nzB; i++) { 1922 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1923 else break; 1924 } 1925 imark = i; 1926 } 1927 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1928 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1929 } 1930 } else { 1931 if (idx) *idx = 0; 1932 if (v) *v = 0; 1933 } 1934 } 1935 *nz = nztot; 1936 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1937 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1942 { 1943 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1944 1945 PetscFunctionBegin; 1946 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1947 aij->getrowactive = PETSC_FALSE; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1952 { 1953 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1954 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1955 PetscErrorCode ierr; 1956 PetscInt i,j,cstart = mat->cmap->rstart; 1957 PetscReal sum = 0.0; 1958 MatScalar *v; 1959 1960 PetscFunctionBegin; 1961 if (aij->size == 1) { 1962 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1963 } else { 1964 if (type == NORM_FROBENIUS) { 1965 v = amat->a; 1966 for (i=0; i<amat->nz; i++) { 1967 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1968 } 1969 v = bmat->a; 1970 for (i=0; i<bmat->nz; i++) { 1971 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1972 } 1973 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1974 *norm = PetscSqrtReal(*norm); 1975 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1976 } else if (type == NORM_1) { /* max column norm */ 1977 PetscReal *tmp,*tmp2; 1978 PetscInt *jj,*garray = aij->garray; 1979 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1980 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1981 *norm = 0.0; 1982 v = amat->a; jj = amat->j; 1983 for (j=0; j<amat->nz; j++) { 1984 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1985 } 1986 v = bmat->a; jj = bmat->j; 1987 for (j=0; j<bmat->nz; j++) { 1988 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1989 } 1990 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1991 for (j=0; j<mat->cmap->N; j++) { 1992 if (tmp2[j] > *norm) *norm = tmp2[j]; 1993 } 1994 ierr = PetscFree(tmp);CHKERRQ(ierr); 1995 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1996 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1997 } else if (type == NORM_INFINITY) { /* max row norm */ 1998 PetscReal ntemp = 0.0; 1999 for (j=0; j<aij->A->rmap->n; j++) { 2000 v = amat->a + amat->i[j]; 2001 sum = 0.0; 2002 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 v = bmat->a + bmat->i[j]; 2006 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 if (sum > ntemp) ntemp = sum; 2010 } 2011 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2012 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2013 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2019 { 2020 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2021 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2022 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2023 PetscErrorCode ierr; 2024 Mat B,A_diag,*B_diag; 2025 MatScalar *array; 2026 2027 PetscFunctionBegin; 2028 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2029 ai = Aloc->i; aj = Aloc->j; 2030 bi = Bloc->i; bj = Bloc->j; 2031 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2032 PetscInt *d_nnz,*g_nnz,*o_nnz; 2033 PetscSFNode *oloc; 2034 PETSC_UNUSED PetscSF sf; 2035 2036 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2037 /* compute d_nnz for preallocation */ 2038 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++) { 2040 d_nnz[aj[i]]++; 2041 } 2042 /* compute local off-diagonal contributions */ 2043 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2044 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2045 /* map those to global */ 2046 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2047 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2048 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2049 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2051 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2052 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2053 2054 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2055 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2056 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2057 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2058 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2059 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2060 } else { 2061 B = *matout; 2062 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2063 } 2064 2065 b = (Mat_MPIAIJ*)B->data; 2066 A_diag = a->A; 2067 B_diag = &b->A; 2068 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2069 A_diag_ncol = A_diag->cmap->N; 2070 B_diag_ilen = sub_B_diag->ilen; 2071 B_diag_i = sub_B_diag->i; 2072 2073 /* Set ilen for diagonal of B */ 2074 for (i=0; i<A_diag_ncol; i++) { 2075 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2076 } 2077 2078 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2079 very quickly (=without using MatSetValues), because all writes are local. */ 2080 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2081 2082 /* copy over the B part */ 2083 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2084 array = Bloc->a; 2085 row = A->rmap->rstart; 2086 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2087 cols_tmp = cols; 2088 for (i=0; i<mb; i++) { 2089 ncol = bi[i+1]-bi[i]; 2090 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2091 row++; 2092 array += ncol; cols_tmp += ncol; 2093 } 2094 ierr = PetscFree(cols);CHKERRQ(ierr); 2095 2096 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2097 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2099 *matout = B; 2100 } else { 2101 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2107 { 2108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2109 Mat a = aij->A,b = aij->B; 2110 PetscErrorCode ierr; 2111 PetscInt s1,s2,s3; 2112 2113 PetscFunctionBegin; 2114 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2115 if (rr) { 2116 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2117 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2118 /* Overlap communication with computation. */ 2119 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 } 2121 if (ll) { 2122 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2123 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2124 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2125 } 2126 /* scale the diagonal block */ 2127 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2128 2129 if (rr) { 2130 /* Do a scatter end and then right scale the off-diagonal block */ 2131 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2148 { 2149 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2150 Mat a,b,c,d; 2151 PetscBool flg; 2152 PetscErrorCode ierr; 2153 2154 PetscFunctionBegin; 2155 a = matA->A; b = matA->B; 2156 c = matB->A; d = matB->B; 2157 2158 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2159 if (flg) { 2160 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2161 } 2162 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2167 { 2168 PetscErrorCode ierr; 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2171 2172 PetscFunctionBegin; 2173 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2174 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2175 /* because of the column compression in the off-processor part of the matrix a->B, 2176 the number of columns in a->B and b->B may be different, hence we cannot call 2177 the MatCopy() directly on the two parts. If need be, we can provide a more 2178 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2179 then copying the submatrices */ 2180 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2181 } else { 2182 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2183 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2184 } 2185 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2190 { 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 /* 2199 Computes the number of nonzeros per row needed for preallocation when X and Y 2200 have different nonzero structure. 2201 */ 2202 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2203 { 2204 PetscInt i,j,k,nzx,nzy; 2205 2206 PetscFunctionBegin; 2207 /* Set the number of nonzeros in the new matrix */ 2208 for (i=0; i<m; i++) { 2209 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2210 nzx = xi[i+1] - xi[i]; 2211 nzy = yi[i+1] - yi[i]; 2212 nnz[i] = 0; 2213 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2214 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2215 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2216 nnz[i]++; 2217 } 2218 for (; k<nzy; k++) nnz[i]++; 2219 } 2220 PetscFunctionReturn(0); 2221 } 2222 2223 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2224 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2225 { 2226 PetscErrorCode ierr; 2227 PetscInt m = Y->rmap->N; 2228 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2229 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2230 2231 PetscFunctionBegin; 2232 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2237 { 2238 PetscErrorCode ierr; 2239 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2240 PetscBLASInt bnz,one=1; 2241 Mat_SeqAIJ *x,*y; 2242 2243 PetscFunctionBegin; 2244 if (str == SAME_NONZERO_PATTERN) { 2245 PetscScalar alpha = a; 2246 x = (Mat_SeqAIJ*)xx->A->data; 2247 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2248 y = (Mat_SeqAIJ*)yy->A->data; 2249 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2250 x = (Mat_SeqAIJ*)xx->B->data; 2251 y = (Mat_SeqAIJ*)yy->B->data; 2252 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2253 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2254 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2255 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2256 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2257 } else { 2258 Mat B; 2259 PetscInt *nnz_d,*nnz_o; 2260 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2261 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2262 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2263 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2264 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2265 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2266 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2267 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2268 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2269 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2270 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2271 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2272 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2273 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2274 } 2275 PetscFunctionReturn(0); 2276 } 2277 2278 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2279 2280 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2281 { 2282 #if defined(PETSC_USE_COMPLEX) 2283 PetscErrorCode ierr; 2284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2285 2286 PetscFunctionBegin; 2287 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2288 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2289 #else 2290 PetscFunctionBegin; 2291 #endif 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2302 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 2311 PetscFunctionBegin; 2312 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2313 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2318 { 2319 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2320 PetscErrorCode ierr; 2321 PetscInt i,*idxb = 0; 2322 PetscScalar *va,*vb; 2323 Vec vtmp; 2324 2325 PetscFunctionBegin; 2326 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2327 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2328 if (idx) { 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2331 } 2332 } 2333 2334 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2335 if (idx) { 2336 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2337 } 2338 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2339 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2340 2341 for (i=0; i<A->rmap->n; i++) { 2342 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2343 va[i] = vb[i]; 2344 if (idx) idx[i] = a->garray[idxb[i]]; 2345 } 2346 } 2347 2348 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2349 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2350 ierr = PetscFree(idxb);CHKERRQ(ierr); 2351 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2356 { 2357 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2358 PetscErrorCode ierr; 2359 PetscInt i,*idxb = 0; 2360 PetscScalar *va,*vb; 2361 Vec vtmp; 2362 2363 PetscFunctionBegin; 2364 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2365 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2366 if (idx) { 2367 for (i=0; i<A->cmap->n; i++) { 2368 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2369 } 2370 } 2371 2372 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2373 if (idx) { 2374 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2375 } 2376 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2377 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2378 2379 for (i=0; i<A->rmap->n; i++) { 2380 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2381 va[i] = vb[i]; 2382 if (idx) idx[i] = a->garray[idxb[i]]; 2383 } 2384 } 2385 2386 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2387 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2388 ierr = PetscFree(idxb);CHKERRQ(ierr); 2389 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2390 PetscFunctionReturn(0); 2391 } 2392 2393 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2396 PetscInt n = A->rmap->n; 2397 PetscInt cstart = A->cmap->rstart; 2398 PetscInt *cmap = mat->garray; 2399 PetscInt *diagIdx, *offdiagIdx; 2400 Vec diagV, offdiagV; 2401 PetscScalar *a, *diagA, *offdiagA; 2402 PetscInt r; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2407 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2408 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2409 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2410 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2411 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2412 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2413 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2414 for (r = 0; r < n; ++r) { 2415 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 idx[r] = cstart + diagIdx[r]; 2418 } else { 2419 a[r] = offdiagA[r]; 2420 idx[r] = cmap[offdiagIdx[r]]; 2421 } 2422 } 2423 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2426 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2427 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2428 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2433 { 2434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2435 PetscInt n = A->rmap->n; 2436 PetscInt cstart = A->cmap->rstart; 2437 PetscInt *cmap = mat->garray; 2438 PetscInt *diagIdx, *offdiagIdx; 2439 Vec diagV, offdiagV; 2440 PetscScalar *a, *diagA, *offdiagA; 2441 PetscInt r; 2442 PetscErrorCode ierr; 2443 2444 PetscFunctionBegin; 2445 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2447 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2448 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2449 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2450 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 for (r = 0; r < n; ++r) { 2454 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2455 a[r] = diagA[r]; 2456 idx[r] = cstart + diagIdx[r]; 2457 } else { 2458 a[r] = offdiagA[r]; 2459 idx[r] = cmap[offdiagIdx[r]]; 2460 } 2461 } 2462 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2463 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2464 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2465 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2466 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2467 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2472 { 2473 PetscErrorCode ierr; 2474 Mat *dummy; 2475 2476 PetscFunctionBegin; 2477 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2478 *newmat = *dummy; 2479 ierr = PetscFree(dummy);CHKERRQ(ierr); 2480 PetscFunctionReturn(0); 2481 } 2482 2483 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2484 { 2485 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2486 PetscErrorCode ierr; 2487 2488 PetscFunctionBegin; 2489 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2490 A->factorerrortype = a->A->factorerrortype; 2491 PetscFunctionReturn(0); 2492 } 2493 2494 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2495 { 2496 PetscErrorCode ierr; 2497 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2498 2499 PetscFunctionBegin; 2500 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2501 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2502 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2503 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2508 { 2509 PetscFunctionBegin; 2510 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2511 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2512 PetscFunctionReturn(0); 2513 } 2514 2515 /*@ 2516 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2517 2518 Collective on Mat 2519 2520 Input Parameters: 2521 + A - the matrix 2522 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2523 2524 Level: advanced 2525 2526 @*/ 2527 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2528 { 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2537 { 2538 PetscErrorCode ierr; 2539 PetscBool sc = PETSC_FALSE,flg; 2540 2541 PetscFunctionBegin; 2542 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2543 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2544 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2545 if (flg) { 2546 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2547 } 2548 ierr = PetscOptionsTail();CHKERRQ(ierr); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2553 { 2554 PetscErrorCode ierr; 2555 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2556 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2557 2558 PetscFunctionBegin; 2559 if (!Y->preallocated) { 2560 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2561 } else if (!aij->nz) { 2562 PetscInt nonew = aij->nonew; 2563 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2564 aij->nonew = nonew; 2565 } 2566 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2573 PetscErrorCode ierr; 2574 2575 PetscFunctionBegin; 2576 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2577 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2578 if (d) { 2579 PetscInt rstart; 2580 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2581 *d += rstart; 2582 2583 } 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 PetscErrorCode ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /* -------------------------------------------------------------------*/ 2598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2599 MatGetRow_MPIAIJ, 2600 MatRestoreRow_MPIAIJ, 2601 MatMult_MPIAIJ, 2602 /* 4*/ MatMultAdd_MPIAIJ, 2603 MatMultTranspose_MPIAIJ, 2604 MatMultTransposeAdd_MPIAIJ, 2605 0, 2606 0, 2607 0, 2608 /*10*/ 0, 2609 0, 2610 0, 2611 MatSOR_MPIAIJ, 2612 MatTranspose_MPIAIJ, 2613 /*15*/ MatGetInfo_MPIAIJ, 2614 MatEqual_MPIAIJ, 2615 MatGetDiagonal_MPIAIJ, 2616 MatDiagonalScale_MPIAIJ, 2617 MatNorm_MPIAIJ, 2618 /*20*/ MatAssemblyBegin_MPIAIJ, 2619 MatAssemblyEnd_MPIAIJ, 2620 MatSetOption_MPIAIJ, 2621 MatZeroEntries_MPIAIJ, 2622 /*24*/ MatZeroRows_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*29*/ MatSetUp_MPIAIJ, 2628 0, 2629 0, 2630 MatGetDiagonalBlock_MPIAIJ, 2631 0, 2632 /*34*/ MatDuplicate_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*39*/ MatAXPY_MPIAIJ, 2638 MatCreateSubMatrices_MPIAIJ, 2639 MatIncreaseOverlap_MPIAIJ, 2640 MatGetValues_MPIAIJ, 2641 MatCopy_MPIAIJ, 2642 /*44*/ MatGetRowMax_MPIAIJ, 2643 MatScale_MPIAIJ, 2644 MatShift_MPIAIJ, 2645 MatDiagonalSet_MPIAIJ, 2646 MatZeroRowsColumns_MPIAIJ, 2647 /*49*/ MatSetRandom_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /*54*/ MatFDColoringCreate_MPIXAIJ, 2653 0, 2654 MatSetUnfactored_MPIAIJ, 2655 MatPermute_MPIAIJ, 2656 0, 2657 /*59*/ MatCreateSubMatrix_MPIAIJ, 2658 MatDestroy_MPIAIJ, 2659 MatView_MPIAIJ, 2660 0, 2661 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2662 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2663 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2668 MatGetRowMinAbs_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*75*/ MatFDColoringApply_AIJ, 2674 MatSetFromOptions_MPIAIJ, 2675 0, 2676 0, 2677 MatFindZeroDiagonals_MPIAIJ, 2678 /*80*/ 0, 2679 0, 2680 0, 2681 /*83*/ MatLoad_MPIAIJ, 2682 MatIsSymmetric_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2688 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2689 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2690 MatPtAP_MPIAIJ_MPIAIJ, 2691 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2692 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /*99*/ 0, 2698 0, 2699 0, 2700 MatConjugate_MPIAIJ, 2701 0, 2702 /*104*/MatSetValuesRow_MPIAIJ, 2703 MatRealPart_MPIAIJ, 2704 MatImaginaryPart_MPIAIJ, 2705 0, 2706 0, 2707 /*109*/0, 2708 0, 2709 MatGetRowMin_MPIAIJ, 2710 0, 2711 MatMissingDiagonal_MPIAIJ, 2712 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2713 0, 2714 MatGetGhosts_MPIAIJ, 2715 0, 2716 0, 2717 /*119*/0, 2718 0, 2719 0, 2720 0, 2721 MatGetMultiProcBlock_MPIAIJ, 2722 /*124*/MatFindNonzeroRows_MPIAIJ, 2723 MatGetColumnNorms_MPIAIJ, 2724 MatInvertBlockDiagonal_MPIAIJ, 2725 MatInvertVariableBlockDiagonal_MPIAIJ, 2726 MatCreateSubMatricesMPI_MPIAIJ, 2727 /*129*/0, 2728 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2729 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2730 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2731 0, 2732 /*134*/0, 2733 0, 2734 MatRARt_MPIAIJ_MPIAIJ, 2735 0, 2736 0, 2737 /*139*/MatSetBlockSizes_MPIAIJ, 2738 0, 2739 0, 2740 MatFDColoringSetUp_MPIXAIJ, 2741 MatFindOffBlockDiagonalEntries_MPIAIJ, 2742 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2743 }; 2744 2745 /* ----------------------------------------------------------------------------------------*/ 2746 2747 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2748 { 2749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2754 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2759 { 2760 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2761 PetscErrorCode ierr; 2762 2763 PetscFunctionBegin; 2764 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2765 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2770 { 2771 Mat_MPIAIJ *b; 2772 PetscErrorCode ierr; 2773 2774 PetscFunctionBegin; 2775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2777 b = (Mat_MPIAIJ*)B->data; 2778 2779 #if defined(PETSC_USE_CTABLE) 2780 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2781 #else 2782 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2783 #endif 2784 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2785 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2786 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2787 2788 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2789 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2790 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2791 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2792 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2793 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2795 2796 if (!B->preallocated) { 2797 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2798 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2799 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2800 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2801 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2802 } 2803 2804 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2805 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2806 B->preallocated = PETSC_TRUE; 2807 B->was_assembled = PETSC_FALSE; 2808 B->assembled = PETSC_FALSE;; 2809 PetscFunctionReturn(0); 2810 } 2811 2812 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2813 { 2814 Mat_MPIAIJ *b; 2815 PetscErrorCode ierr; 2816 2817 PetscFunctionBegin; 2818 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2819 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2820 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2821 b = (Mat_MPIAIJ*)B->data; 2822 2823 #if defined(PETSC_USE_CTABLE) 2824 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2825 #else 2826 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2827 #endif 2828 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2829 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2830 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2831 2832 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2833 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2834 B->preallocated = PETSC_TRUE; 2835 B->was_assembled = PETSC_FALSE; 2836 B->assembled = PETSC_FALSE; 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2841 { 2842 Mat mat; 2843 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2844 PetscErrorCode ierr; 2845 2846 PetscFunctionBegin; 2847 *newmat = 0; 2848 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2849 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2850 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2851 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2852 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2853 a = (Mat_MPIAIJ*)mat->data; 2854 2855 mat->factortype = matin->factortype; 2856 mat->assembled = PETSC_TRUE; 2857 mat->insertmode = NOT_SET_VALUES; 2858 mat->preallocated = PETSC_TRUE; 2859 2860 a->size = oldmat->size; 2861 a->rank = oldmat->rank; 2862 a->donotstash = oldmat->donotstash; 2863 a->roworiented = oldmat->roworiented; 2864 a->rowindices = 0; 2865 a->rowvalues = 0; 2866 a->getrowactive = PETSC_FALSE; 2867 2868 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2869 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2870 2871 if (oldmat->colmap) { 2872 #if defined(PETSC_USE_CTABLE) 2873 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2874 #else 2875 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2877 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2878 #endif 2879 } else a->colmap = 0; 2880 if (oldmat->garray) { 2881 PetscInt len; 2882 len = oldmat->B->cmap->n; 2883 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2885 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2886 } else a->garray = 0; 2887 2888 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2890 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2891 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2892 2893 if (oldmat->Mvctx_mpi1) { 2894 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2895 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2896 } 2897 2898 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2900 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2902 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2903 *newmat = mat; 2904 PetscFunctionReturn(0); 2905 } 2906 2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2908 { 2909 PetscBool isbinary, ishdf5; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2914 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2915 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2916 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2917 if (isbinary) { 2918 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2919 } else if (ishdf5) { 2920 #if defined(PETSC_HAVE_HDF5) 2921 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2922 #else 2923 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2924 #endif 2925 } else { 2926 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2927 } 2928 PetscFunctionReturn(0); 2929 } 2930 2931 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2932 { 2933 PetscScalar *vals,*svals; 2934 MPI_Comm comm; 2935 PetscErrorCode ierr; 2936 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2937 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2938 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2939 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2940 PetscInt cend,cstart,n,*rowners; 2941 int fd; 2942 PetscInt bs = newMat->rmap->bs; 2943 2944 PetscFunctionBegin; 2945 /* force binary viewer to load .info file if it has not yet done so */ 2946 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2947 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2948 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2949 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2950 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2951 if (!rank) { 2952 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2953 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2954 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2955 } 2956 2957 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2958 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2959 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2960 if (bs < 0) bs = 1; 2961 2962 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2963 M = header[1]; N = header[2]; 2964 2965 /* If global sizes are set, check if they are consistent with that given in the file */ 2966 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2967 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2968 2969 /* determine ownership of all (block) rows */ 2970 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2971 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2972 else m = newMat->rmap->n; /* Set by user */ 2973 2974 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2975 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2976 2977 /* First process needs enough room for process with most rows */ 2978 if (!rank) { 2979 mmax = rowners[1]; 2980 for (i=2; i<=size; i++) { 2981 mmax = PetscMax(mmax, rowners[i]); 2982 } 2983 } else mmax = -1; /* unused, but compilers complain */ 2984 2985 rowners[0] = 0; 2986 for (i=2; i<=size; i++) { 2987 rowners[i] += rowners[i-1]; 2988 } 2989 rstart = rowners[rank]; 2990 rend = rowners[rank+1]; 2991 2992 /* distribute row lengths to all processors */ 2993 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2994 if (!rank) { 2995 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2996 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2997 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2998 for (j=0; j<m; j++) { 2999 procsnz[0] += ourlens[j]; 3000 } 3001 for (i=1; i<size; i++) { 3002 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3003 /* calculate the number of nonzeros on each processor */ 3004 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3005 procsnz[i] += rowlengths[j]; 3006 } 3007 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3008 } 3009 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3010 } else { 3011 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3012 } 3013 3014 if (!rank) { 3015 /* determine max buffer needed and allocate it */ 3016 maxnz = 0; 3017 for (i=0; i<size; i++) { 3018 maxnz = PetscMax(maxnz,procsnz[i]); 3019 } 3020 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3021 3022 /* read in my part of the matrix column indices */ 3023 nz = procsnz[0]; 3024 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3025 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3026 3027 /* read in every one elses and ship off */ 3028 for (i=1; i<size; i++) { 3029 nz = procsnz[i]; 3030 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3031 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3032 } 3033 ierr = PetscFree(cols);CHKERRQ(ierr); 3034 } else { 3035 /* determine buffer space needed for message */ 3036 nz = 0; 3037 for (i=0; i<m; i++) { 3038 nz += ourlens[i]; 3039 } 3040 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3041 3042 /* receive message of column indices*/ 3043 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3044 } 3045 3046 /* determine column ownership if matrix is not square */ 3047 if (N != M) { 3048 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3049 else n = newMat->cmap->n; 3050 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3051 cstart = cend - n; 3052 } else { 3053 cstart = rstart; 3054 cend = rend; 3055 n = cend - cstart; 3056 } 3057 3058 /* loop over local rows, determining number of off diagonal entries */ 3059 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3060 jj = 0; 3061 for (i=0; i<m; i++) { 3062 for (j=0; j<ourlens[i]; j++) { 3063 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3064 jj++; 3065 } 3066 } 3067 3068 for (i=0; i<m; i++) { 3069 ourlens[i] -= offlens[i]; 3070 } 3071 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3072 3073 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3074 3075 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3076 3077 for (i=0; i<m; i++) { 3078 ourlens[i] += offlens[i]; 3079 } 3080 3081 if (!rank) { 3082 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3083 3084 /* read in my part of the matrix numerical values */ 3085 nz = procsnz[0]; 3086 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3087 3088 /* insert into matrix */ 3089 jj = rstart; 3090 smycols = mycols; 3091 svals = vals; 3092 for (i=0; i<m; i++) { 3093 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3094 smycols += ourlens[i]; 3095 svals += ourlens[i]; 3096 jj++; 3097 } 3098 3099 /* read in other processors and ship out */ 3100 for (i=1; i<size; i++) { 3101 nz = procsnz[i]; 3102 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3103 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3104 } 3105 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3106 } else { 3107 /* receive numeric values */ 3108 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3109 3110 /* receive message of values*/ 3111 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3112 3113 /* insert into matrix */ 3114 jj = rstart; 3115 smycols = mycols; 3116 svals = vals; 3117 for (i=0; i<m; i++) { 3118 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3119 smycols += ourlens[i]; 3120 svals += ourlens[i]; 3121 jj++; 3122 } 3123 } 3124 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3125 ierr = PetscFree(vals);CHKERRQ(ierr); 3126 ierr = PetscFree(mycols);CHKERRQ(ierr); 3127 ierr = PetscFree(rowners);CHKERRQ(ierr); 3128 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3129 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /* Not scalable because of ISAllGather() unless getting all columns. */ 3134 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3135 { 3136 PetscErrorCode ierr; 3137 IS iscol_local; 3138 PetscBool isstride; 3139 PetscMPIInt lisstride=0,gisstride; 3140 3141 PetscFunctionBegin; 3142 /* check if we are grabbing all columns*/ 3143 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3144 3145 if (isstride) { 3146 PetscInt start,len,mstart,mlen; 3147 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3148 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3149 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3150 if (mstart == start && mlen-mstart == len) lisstride = 1; 3151 } 3152 3153 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3154 if (gisstride) { 3155 PetscInt N; 3156 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3157 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3158 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3159 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3160 } else { 3161 PetscInt cbs; 3162 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3163 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3164 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3165 } 3166 3167 *isseq = iscol_local; 3168 PetscFunctionReturn(0); 3169 } 3170 3171 /* 3172 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3173 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3174 3175 Input Parameters: 3176 mat - matrix 3177 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3178 i.e., mat->rstart <= isrow[i] < mat->rend 3179 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3180 i.e., mat->cstart <= iscol[i] < mat->cend 3181 Output Parameter: 3182 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3183 iscol_o - sequential column index set for retrieving mat->B 3184 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3185 */ 3186 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3187 { 3188 PetscErrorCode ierr; 3189 Vec x,cmap; 3190 const PetscInt *is_idx; 3191 PetscScalar *xarray,*cmaparray; 3192 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3193 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3194 Mat B=a->B; 3195 Vec lvec=a->lvec,lcmap; 3196 PetscInt i,cstart,cend,Bn=B->cmap->N; 3197 MPI_Comm comm; 3198 VecScatter Mvctx=a->Mvctx; 3199 3200 PetscFunctionBegin; 3201 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3202 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3203 3204 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3205 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3206 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3207 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3208 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3209 3210 /* Get start indices */ 3211 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3212 isstart -= ncols; 3213 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3214 3215 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3216 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3217 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3218 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3219 for (i=0; i<ncols; i++) { 3220 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3221 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3222 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3223 } 3224 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3225 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3226 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3227 3228 /* Get iscol_d */ 3229 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3230 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3231 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3232 3233 /* Get isrow_d */ 3234 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3235 rstart = mat->rmap->rstart; 3236 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3237 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3238 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3239 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3240 3241 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3242 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3243 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3244 3245 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3246 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3247 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3248 3249 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3250 3251 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3252 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3253 3254 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3255 /* off-process column indices */ 3256 count = 0; 3257 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3259 3260 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3261 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3262 for (i=0; i<Bn; i++) { 3263 if (PetscRealPart(xarray[i]) > -1.0) { 3264 idx[count] = i; /* local column index in off-diagonal part B */ 3265 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3266 count++; 3267 } 3268 } 3269 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3270 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3271 3272 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3273 /* cannot ensure iscol_o has same blocksize as iscol! */ 3274 3275 ierr = PetscFree(idx);CHKERRQ(ierr); 3276 *garray = cmap1; 3277 3278 ierr = VecDestroy(&x);CHKERRQ(ierr); 3279 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3280 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3281 PetscFunctionReturn(0); 3282 } 3283 3284 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3285 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3286 { 3287 PetscErrorCode ierr; 3288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3289 Mat M = NULL; 3290 MPI_Comm comm; 3291 IS iscol_d,isrow_d,iscol_o; 3292 Mat Asub = NULL,Bsub = NULL; 3293 PetscInt n; 3294 3295 PetscFunctionBegin; 3296 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3297 3298 if (call == MAT_REUSE_MATRIX) { 3299 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3300 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3301 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3302 3303 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3304 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3305 3306 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3307 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3308 3309 /* Update diagonal and off-diagonal portions of submat */ 3310 asub = (Mat_MPIAIJ*)(*submat)->data; 3311 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3312 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3313 if (n) { 3314 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3315 } 3316 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3317 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3318 3319 } else { /* call == MAT_INITIAL_MATRIX) */ 3320 const PetscInt *garray; 3321 PetscInt BsubN; 3322 3323 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3324 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3325 3326 /* Create local submatrices Asub and Bsub */ 3327 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3328 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3329 3330 /* Create submatrix M */ 3331 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3332 3333 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3334 asub = (Mat_MPIAIJ*)M->data; 3335 3336 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3337 n = asub->B->cmap->N; 3338 if (BsubN > n) { 3339 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3340 const PetscInt *idx; 3341 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3342 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3343 3344 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3345 j = 0; 3346 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3347 for (i=0; i<n; i++) { 3348 if (j >= BsubN) break; 3349 while (subgarray[i] > garray[j]) j++; 3350 3351 if (subgarray[i] == garray[j]) { 3352 idx_new[i] = idx[j++]; 3353 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3354 } 3355 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3356 3357 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3358 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3359 3360 } else if (BsubN < n) { 3361 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3362 } 3363 3364 ierr = PetscFree(garray);CHKERRQ(ierr); 3365 *submat = M; 3366 3367 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3368 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3369 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3370 3371 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3372 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3373 3374 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3375 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3376 } 3377 PetscFunctionReturn(0); 3378 } 3379 3380 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3381 { 3382 PetscErrorCode ierr; 3383 IS iscol_local=NULL,isrow_d; 3384 PetscInt csize; 3385 PetscInt n,i,j,start,end; 3386 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3387 MPI_Comm comm; 3388 3389 PetscFunctionBegin; 3390 /* If isrow has same processor distribution as mat, 3391 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3392 if (call == MAT_REUSE_MATRIX) { 3393 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3394 if (isrow_d) { 3395 sameRowDist = PETSC_TRUE; 3396 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3397 } else { 3398 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3399 if (iscol_local) { 3400 sameRowDist = PETSC_TRUE; 3401 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3402 } 3403 } 3404 } else { 3405 /* Check if isrow has same processor distribution as mat */ 3406 sameDist[0] = PETSC_FALSE; 3407 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3408 if (!n) { 3409 sameDist[0] = PETSC_TRUE; 3410 } else { 3411 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3412 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3413 if (i >= start && j < end) { 3414 sameDist[0] = PETSC_TRUE; 3415 } 3416 } 3417 3418 /* Check if iscol has same processor distribution as mat */ 3419 sameDist[1] = PETSC_FALSE; 3420 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3421 if (!n) { 3422 sameDist[1] = PETSC_TRUE; 3423 } else { 3424 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3425 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3426 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3427 } 3428 3429 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3430 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3431 sameRowDist = tsameDist[0]; 3432 } 3433 3434 if (sameRowDist) { 3435 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3436 /* isrow and iscol have same processor distribution as mat */ 3437 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3438 PetscFunctionReturn(0); 3439 } else { /* sameRowDist */ 3440 /* isrow has same processor distribution as mat */ 3441 if (call == MAT_INITIAL_MATRIX) { 3442 PetscBool sorted; 3443 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3444 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3445 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3446 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3447 3448 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3449 if (sorted) { 3450 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3451 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3452 PetscFunctionReturn(0); 3453 } 3454 } else { /* call == MAT_REUSE_MATRIX */ 3455 IS iscol_sub; 3456 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3457 if (iscol_sub) { 3458 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3459 PetscFunctionReturn(0); 3460 } 3461 } 3462 } 3463 } 3464 3465 /* General case: iscol -> iscol_local which has global size of iscol */ 3466 if (call == MAT_REUSE_MATRIX) { 3467 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3468 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3469 } else { 3470 if (!iscol_local) { 3471 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3472 } 3473 } 3474 3475 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3476 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3477 3478 if (call == MAT_INITIAL_MATRIX) { 3479 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3480 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3481 } 3482 PetscFunctionReturn(0); 3483 } 3484 3485 /*@C 3486 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3487 and "off-diagonal" part of the matrix in CSR format. 3488 3489 Collective on MPI_Comm 3490 3491 Input Parameters: 3492 + comm - MPI communicator 3493 . A - "diagonal" portion of matrix 3494 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3495 - garray - global index of B columns 3496 3497 Output Parameter: 3498 . mat - the matrix, with input A as its local diagonal matrix 3499 Level: advanced 3500 3501 Notes: 3502 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3503 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3504 3505 .seealso: MatCreateMPIAIJWithSplitArrays() 3506 @*/ 3507 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3508 { 3509 PetscErrorCode ierr; 3510 Mat_MPIAIJ *maij; 3511 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3512 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3513 PetscScalar *oa=b->a; 3514 Mat Bnew; 3515 PetscInt m,n,N; 3516 3517 PetscFunctionBegin; 3518 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3519 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3520 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3521 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3522 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3523 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3524 3525 /* Get global columns of mat */ 3526 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3527 3528 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3529 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3530 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3531 maij = (Mat_MPIAIJ*)(*mat)->data; 3532 3533 (*mat)->preallocated = PETSC_TRUE; 3534 3535 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3536 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3537 3538 /* Set A as diagonal portion of *mat */ 3539 maij->A = A; 3540 3541 nz = oi[m]; 3542 for (i=0; i<nz; i++) { 3543 col = oj[i]; 3544 oj[i] = garray[col]; 3545 } 3546 3547 /* Set Bnew as off-diagonal portion of *mat */ 3548 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3549 bnew = (Mat_SeqAIJ*)Bnew->data; 3550 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3551 maij->B = Bnew; 3552 3553 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3554 3555 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3556 b->free_a = PETSC_FALSE; 3557 b->free_ij = PETSC_FALSE; 3558 ierr = MatDestroy(&B);CHKERRQ(ierr); 3559 3560 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3561 bnew->free_a = PETSC_TRUE; 3562 bnew->free_ij = PETSC_TRUE; 3563 3564 /* condense columns of maij->B */ 3565 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3566 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3569 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3570 PetscFunctionReturn(0); 3571 } 3572 3573 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3574 3575 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3576 { 3577 PetscErrorCode ierr; 3578 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3579 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3580 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3581 Mat M,Msub,B=a->B; 3582 MatScalar *aa; 3583 Mat_SeqAIJ *aij; 3584 PetscInt *garray = a->garray,*colsub,Ncols; 3585 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3586 IS iscol_sub,iscmap; 3587 const PetscInt *is_idx,*cmap; 3588 PetscBool allcolumns=PETSC_FALSE; 3589 MPI_Comm comm; 3590 3591 PetscFunctionBegin; 3592 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3593 3594 if (call == MAT_REUSE_MATRIX) { 3595 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3596 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3597 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3598 3599 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3600 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3601 3602 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3603 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3604 3605 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3606 3607 } else { /* call == MAT_INITIAL_MATRIX) */ 3608 PetscBool flg; 3609 3610 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3611 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3612 3613 /* (1) iscol -> nonscalable iscol_local */ 3614 /* Check for special case: each processor gets entire matrix columns */ 3615 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3616 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3617 if (allcolumns) { 3618 iscol_sub = iscol_local; 3619 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3620 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3621 3622 } else { 3623 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3624 PetscInt *idx,*cmap1,k; 3625 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3626 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3627 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3628 count = 0; 3629 k = 0; 3630 for (i=0; i<Ncols; i++) { 3631 j = is_idx[i]; 3632 if (j >= cstart && j < cend) { 3633 /* diagonal part of mat */ 3634 idx[count] = j; 3635 cmap1[count++] = i; /* column index in submat */ 3636 } else if (Bn) { 3637 /* off-diagonal part of mat */ 3638 if (j == garray[k]) { 3639 idx[count] = j; 3640 cmap1[count++] = i; /* column index in submat */ 3641 } else if (j > garray[k]) { 3642 while (j > garray[k] && k < Bn-1) k++; 3643 if (j == garray[k]) { 3644 idx[count] = j; 3645 cmap1[count++] = i; /* column index in submat */ 3646 } 3647 } 3648 } 3649 } 3650 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3651 3652 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3653 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3654 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3655 3656 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3657 } 3658 3659 /* (3) Create sequential Msub */ 3660 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3661 } 3662 3663 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3664 aij = (Mat_SeqAIJ*)(Msub)->data; 3665 ii = aij->i; 3666 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3667 3668 /* 3669 m - number of local rows 3670 Ncols - number of columns (same on all processors) 3671 rstart - first row in new global matrix generated 3672 */ 3673 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3674 3675 if (call == MAT_INITIAL_MATRIX) { 3676 /* (4) Create parallel newmat */ 3677 PetscMPIInt rank,size; 3678 PetscInt csize; 3679 3680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3682 3683 /* 3684 Determine the number of non-zeros in the diagonal and off-diagonal 3685 portions of the matrix in order to do correct preallocation 3686 */ 3687 3688 /* first get start and end of "diagonal" columns */ 3689 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3690 if (csize == PETSC_DECIDE) { 3691 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3692 if (mglobal == Ncols) { /* square matrix */ 3693 nlocal = m; 3694 } else { 3695 nlocal = Ncols/size + ((Ncols % size) > rank); 3696 } 3697 } else { 3698 nlocal = csize; 3699 } 3700 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3701 rstart = rend - nlocal; 3702 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3703 3704 /* next, compute all the lengths */ 3705 jj = aij->j; 3706 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3707 olens = dlens + m; 3708 for (i=0; i<m; i++) { 3709 jend = ii[i+1] - ii[i]; 3710 olen = 0; 3711 dlen = 0; 3712 for (j=0; j<jend; j++) { 3713 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3714 else dlen++; 3715 jj++; 3716 } 3717 olens[i] = olen; 3718 dlens[i] = dlen; 3719 } 3720 3721 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3722 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3723 3724 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3725 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3726 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3727 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3728 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3729 ierr = PetscFree(dlens);CHKERRQ(ierr); 3730 3731 } else { /* call == MAT_REUSE_MATRIX */ 3732 M = *newmat; 3733 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3734 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3735 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3736 /* 3737 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3738 rather than the slower MatSetValues(). 3739 */ 3740 M->was_assembled = PETSC_TRUE; 3741 M->assembled = PETSC_FALSE; 3742 } 3743 3744 /* (5) Set values of Msub to *newmat */ 3745 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3746 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3747 3748 jj = aij->j; 3749 aa = aij->a; 3750 for (i=0; i<m; i++) { 3751 row = rstart + i; 3752 nz = ii[i+1] - ii[i]; 3753 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3754 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3755 jj += nz; aa += nz; 3756 } 3757 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3758 3759 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3760 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 3762 ierr = PetscFree(colsub);CHKERRQ(ierr); 3763 3764 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3765 if (call == MAT_INITIAL_MATRIX) { 3766 *newmat = M; 3767 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3768 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3769 3770 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3771 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3772 3773 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3774 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3775 3776 if (iscol_local) { 3777 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3778 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3779 } 3780 } 3781 PetscFunctionReturn(0); 3782 } 3783 3784 /* 3785 Not great since it makes two copies of the submatrix, first an SeqAIJ 3786 in local and then by concatenating the local matrices the end result. 3787 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3788 3789 Note: This requires a sequential iscol with all indices. 3790 */ 3791 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3792 { 3793 PetscErrorCode ierr; 3794 PetscMPIInt rank,size; 3795 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3796 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3797 Mat M,Mreuse; 3798 MatScalar *aa,*vwork; 3799 MPI_Comm comm; 3800 Mat_SeqAIJ *aij; 3801 PetscBool colflag,allcolumns=PETSC_FALSE; 3802 3803 PetscFunctionBegin; 3804 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3805 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3806 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3807 3808 /* Check for special case: each processor gets entire matrix columns */ 3809 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3810 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3811 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3812 3813 if (call == MAT_REUSE_MATRIX) { 3814 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3815 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3816 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3817 } else { 3818 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3819 } 3820 3821 /* 3822 m - number of local rows 3823 n - number of columns (same on all processors) 3824 rstart - first row in new global matrix generated 3825 */ 3826 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3827 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3828 if (call == MAT_INITIAL_MATRIX) { 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 3833 /* 3834 Determine the number of non-zeros in the diagonal and off-diagonal 3835 portions of the matrix in order to do correct preallocation 3836 */ 3837 3838 /* first get start and end of "diagonal" columns */ 3839 if (csize == PETSC_DECIDE) { 3840 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3841 if (mglobal == n) { /* square matrix */ 3842 nlocal = m; 3843 } else { 3844 nlocal = n/size + ((n % size) > rank); 3845 } 3846 } else { 3847 nlocal = csize; 3848 } 3849 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3850 rstart = rend - nlocal; 3851 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3852 3853 /* next, compute all the lengths */ 3854 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3855 olens = dlens + m; 3856 for (i=0; i<m; i++) { 3857 jend = ii[i+1] - ii[i]; 3858 olen = 0; 3859 dlen = 0; 3860 for (j=0; j<jend; j++) { 3861 if (*jj < rstart || *jj >= rend) olen++; 3862 else dlen++; 3863 jj++; 3864 } 3865 olens[i] = olen; 3866 dlens[i] = dlen; 3867 } 3868 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3869 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3870 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3871 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3872 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3873 ierr = PetscFree(dlens);CHKERRQ(ierr); 3874 } else { 3875 PetscInt ml,nl; 3876 3877 M = *newmat; 3878 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3879 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3880 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3881 /* 3882 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3883 rather than the slower MatSetValues(). 3884 */ 3885 M->was_assembled = PETSC_TRUE; 3886 M->assembled = PETSC_FALSE; 3887 } 3888 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3889 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3890 ii = aij->i; 3891 jj = aij->j; 3892 aa = aij->a; 3893 for (i=0; i<m; i++) { 3894 row = rstart + i; 3895 nz = ii[i+1] - ii[i]; 3896 cwork = jj; jj += nz; 3897 vwork = aa; aa += nz; 3898 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3899 } 3900 3901 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3902 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3903 *newmat = M; 3904 3905 /* save submatrix used in processor for next request */ 3906 if (call == MAT_INITIAL_MATRIX) { 3907 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3908 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3909 } 3910 PetscFunctionReturn(0); 3911 } 3912 3913 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3914 { 3915 PetscInt m,cstart, cend,j,nnz,i,d; 3916 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3917 const PetscInt *JJ; 3918 PetscScalar *values; 3919 PetscErrorCode ierr; 3920 PetscBool nooffprocentries; 3921 3922 PetscFunctionBegin; 3923 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3924 3925 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3926 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3927 m = B->rmap->n; 3928 cstart = B->cmap->rstart; 3929 cend = B->cmap->rend; 3930 rstart = B->rmap->rstart; 3931 3932 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3933 3934 #if defined(PETSC_USE_DEBUG) 3935 for (i=0; i<m && Ii; i++) { 3936 nnz = Ii[i+1]- Ii[i]; 3937 JJ = J + Ii[i]; 3938 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3939 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3940 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3941 } 3942 #endif 3943 3944 for (i=0; i<m && Ii; i++) { 3945 nnz = Ii[i+1]- Ii[i]; 3946 JJ = J + Ii[i]; 3947 nnz_max = PetscMax(nnz_max,nnz); 3948 d = 0; 3949 for (j=0; j<nnz; j++) { 3950 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3951 } 3952 d_nnz[i] = d; 3953 o_nnz[i] = nnz - d; 3954 } 3955 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3956 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3957 3958 if (v) values = (PetscScalar*)v; 3959 else { 3960 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3961 } 3962 3963 for (i=0; i<m && Ii; i++) { 3964 ii = i + rstart; 3965 nnz = Ii[i+1]- Ii[i]; 3966 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3967 } 3968 nooffprocentries = B->nooffprocentries; 3969 B->nooffprocentries = PETSC_TRUE; 3970 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3971 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3972 B->nooffprocentries = nooffprocentries; 3973 3974 if (!v) { 3975 ierr = PetscFree(values);CHKERRQ(ierr); 3976 } 3977 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 /*@ 3982 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3983 (the default parallel PETSc format). 3984 3985 Collective on MPI_Comm 3986 3987 Input Parameters: 3988 + B - the matrix 3989 . i - the indices into j for the start of each local row (starts with zero) 3990 . j - the column indices for each local row (starts with zero) 3991 - v - optional values in the matrix 3992 3993 Level: developer 3994 3995 Notes: 3996 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3997 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3998 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3999 4000 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4001 4002 The format which is used for the sparse matrix input, is equivalent to a 4003 row-major ordering.. i.e for the following matrix, the input data expected is 4004 as shown 4005 4006 $ 1 0 0 4007 $ 2 0 3 P0 4008 $ ------- 4009 $ 4 5 6 P1 4010 $ 4011 $ Process0 [P0]: rows_owned=[0,1] 4012 $ i = {0,1,3} [size = nrow+1 = 2+1] 4013 $ j = {0,0,2} [size = 3] 4014 $ v = {1,2,3} [size = 3] 4015 $ 4016 $ Process1 [P1]: rows_owned=[2] 4017 $ i = {0,3} [size = nrow+1 = 1+1] 4018 $ j = {0,1,2} [size = 3] 4019 $ v = {4,5,6} [size = 3] 4020 4021 .keywords: matrix, aij, compressed row, sparse, parallel 4022 4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4024 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4025 @*/ 4026 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4027 { 4028 PetscErrorCode ierr; 4029 4030 PetscFunctionBegin; 4031 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 /*@C 4036 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4037 (the default parallel PETSc format). For good matrix assembly performance 4038 the user should preallocate the matrix storage by setting the parameters 4039 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4040 performance can be increased by more than a factor of 50. 4041 4042 Collective on MPI_Comm 4043 4044 Input Parameters: 4045 + B - the matrix 4046 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4047 (same value is used for all local rows) 4048 . d_nnz - array containing the number of nonzeros in the various rows of the 4049 DIAGONAL portion of the local submatrix (possibly different for each row) 4050 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4051 The size of this array is equal to the number of local rows, i.e 'm'. 4052 For matrices that will be factored, you must leave room for (and set) 4053 the diagonal entry even if it is zero. 4054 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4055 submatrix (same value is used for all local rows). 4056 - o_nnz - array containing the number of nonzeros in the various rows of the 4057 OFF-DIAGONAL portion of the local submatrix (possibly different for 4058 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4059 structure. The size of this array is equal to the number 4060 of local rows, i.e 'm'. 4061 4062 If the *_nnz parameter is given then the *_nz parameter is ignored 4063 4064 The AIJ format (also called the Yale sparse matrix format or 4065 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4066 storage. The stored row and column indices begin with zero. 4067 See Users-Manual: ch_mat for details. 4068 4069 The parallel matrix is partitioned such that the first m0 rows belong to 4070 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4071 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4072 4073 The DIAGONAL portion of the local submatrix of a processor can be defined 4074 as the submatrix which is obtained by extraction the part corresponding to 4075 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4076 first row that belongs to the processor, r2 is the last row belonging to 4077 the this processor, and c1-c2 is range of indices of the local part of a 4078 vector suitable for applying the matrix to. This is an mxn matrix. In the 4079 common case of a square matrix, the row and column ranges are the same and 4080 the DIAGONAL part is also square. The remaining portion of the local 4081 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4082 4083 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4084 4085 You can call MatGetInfo() to get information on how effective the preallocation was; 4086 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4087 You can also run with the option -info and look for messages with the string 4088 malloc in them to see if additional memory allocation was needed. 4089 4090 Example usage: 4091 4092 Consider the following 8x8 matrix with 34 non-zero values, that is 4093 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4094 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4095 as follows: 4096 4097 .vb 4098 1 2 0 | 0 3 0 | 0 4 4099 Proc0 0 5 6 | 7 0 0 | 8 0 4100 9 0 10 | 11 0 0 | 12 0 4101 ------------------------------------- 4102 13 0 14 | 15 16 17 | 0 0 4103 Proc1 0 18 0 | 19 20 21 | 0 0 4104 0 0 0 | 22 23 0 | 24 0 4105 ------------------------------------- 4106 Proc2 25 26 27 | 0 0 28 | 29 0 4107 30 0 0 | 31 32 33 | 0 34 4108 .ve 4109 4110 This can be represented as a collection of submatrices as: 4111 4112 .vb 4113 A B C 4114 D E F 4115 G H I 4116 .ve 4117 4118 Where the submatrices A,B,C are owned by proc0, D,E,F are 4119 owned by proc1, G,H,I are owned by proc2. 4120 4121 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4122 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4123 The 'M','N' parameters are 8,8, and have the same values on all procs. 4124 4125 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4126 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4127 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4128 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4129 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4130 matrix, ans [DF] as another SeqAIJ matrix. 4131 4132 When d_nz, o_nz parameters are specified, d_nz storage elements are 4133 allocated for every row of the local diagonal submatrix, and o_nz 4134 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4135 One way to choose d_nz and o_nz is to use the max nonzerors per local 4136 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4137 In this case, the values of d_nz,o_nz are: 4138 .vb 4139 proc0 : dnz = 2, o_nz = 2 4140 proc1 : dnz = 3, o_nz = 2 4141 proc2 : dnz = 1, o_nz = 4 4142 .ve 4143 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4144 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4145 for proc3. i.e we are using 12+15+10=37 storage locations to store 4146 34 values. 4147 4148 When d_nnz, o_nnz parameters are specified, the storage is specified 4149 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4150 In the above case the values for d_nnz,o_nnz are: 4151 .vb 4152 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4153 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4154 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4155 .ve 4156 Here the space allocated is sum of all the above values i.e 34, and 4157 hence pre-allocation is perfect. 4158 4159 Level: intermediate 4160 4161 .keywords: matrix, aij, compressed row, sparse, parallel 4162 4163 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4164 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4165 @*/ 4166 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4167 { 4168 PetscErrorCode ierr; 4169 4170 PetscFunctionBegin; 4171 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4172 PetscValidType(B,1); 4173 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4174 PetscFunctionReturn(0); 4175 } 4176 4177 /*@ 4178 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4179 CSR format the local rows. 4180 4181 Collective on MPI_Comm 4182 4183 Input Parameters: 4184 + comm - MPI communicator 4185 . m - number of local rows (Cannot be PETSC_DECIDE) 4186 . n - This value should be the same as the local size used in creating the 4187 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4188 calculated if N is given) For square matrices n is almost always m. 4189 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4190 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4191 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4192 . j - column indices 4193 - a - matrix values 4194 4195 Output Parameter: 4196 . mat - the matrix 4197 4198 Level: intermediate 4199 4200 Notes: 4201 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4202 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4203 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4204 4205 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4206 4207 The format which is used for the sparse matrix input, is equivalent to a 4208 row-major ordering.. i.e for the following matrix, the input data expected is 4209 as shown 4210 4211 $ 1 0 0 4212 $ 2 0 3 P0 4213 $ ------- 4214 $ 4 5 6 P1 4215 $ 4216 $ Process0 [P0]: rows_owned=[0,1] 4217 $ i = {0,1,3} [size = nrow+1 = 2+1] 4218 $ j = {0,0,2} [size = 3] 4219 $ v = {1,2,3} [size = 3] 4220 $ 4221 $ Process1 [P1]: rows_owned=[2] 4222 $ i = {0,3} [size = nrow+1 = 1+1] 4223 $ j = {0,1,2} [size = 3] 4224 $ v = {4,5,6} [size = 3] 4225 4226 .keywords: matrix, aij, compressed row, sparse, parallel 4227 4228 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4229 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4230 @*/ 4231 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4232 { 4233 PetscErrorCode ierr; 4234 4235 PetscFunctionBegin; 4236 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4237 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4238 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4239 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4240 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4241 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4242 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4243 PetscFunctionReturn(0); 4244 } 4245 4246 /*@C 4247 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4248 (the default parallel PETSc format). For good matrix assembly performance 4249 the user should preallocate the matrix storage by setting the parameters 4250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4251 performance can be increased by more than a factor of 50. 4252 4253 Collective on MPI_Comm 4254 4255 Input Parameters: 4256 + comm - MPI communicator 4257 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4258 This value should be the same as the local size used in creating the 4259 y vector for the matrix-vector product y = Ax. 4260 . n - This value should be the same as the local size used in creating the 4261 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4262 calculated if N is given) For square matrices n is almost always m. 4263 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4264 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4265 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4266 (same value is used for all local rows) 4267 . d_nnz - array containing the number of nonzeros in the various rows of the 4268 DIAGONAL portion of the local submatrix (possibly different for each row) 4269 or NULL, if d_nz is used to specify the nonzero structure. 4270 The size of this array is equal to the number of local rows, i.e 'm'. 4271 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4272 submatrix (same value is used for all local rows). 4273 - o_nnz - array containing the number of nonzeros in the various rows of the 4274 OFF-DIAGONAL portion of the local submatrix (possibly different for 4275 each row) or NULL, if o_nz is used to specify the nonzero 4276 structure. The size of this array is equal to the number 4277 of local rows, i.e 'm'. 4278 4279 Output Parameter: 4280 . A - the matrix 4281 4282 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4283 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4284 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4285 4286 Notes: 4287 If the *_nnz parameter is given then the *_nz parameter is ignored 4288 4289 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4290 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4291 storage requirements for this matrix. 4292 4293 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4294 processor than it must be used on all processors that share the object for 4295 that argument. 4296 4297 The user MUST specify either the local or global matrix dimensions 4298 (possibly both). 4299 4300 The parallel matrix is partitioned across processors such that the 4301 first m0 rows belong to process 0, the next m1 rows belong to 4302 process 1, the next m2 rows belong to process 2 etc.. where 4303 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4304 values corresponding to [m x N] submatrix. 4305 4306 The columns are logically partitioned with the n0 columns belonging 4307 to 0th partition, the next n1 columns belonging to the next 4308 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4309 4310 The DIAGONAL portion of the local submatrix on any given processor 4311 is the submatrix corresponding to the rows and columns m,n 4312 corresponding to the given processor. i.e diagonal matrix on 4313 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4314 etc. The remaining portion of the local submatrix [m x (N-n)] 4315 constitute the OFF-DIAGONAL portion. The example below better 4316 illustrates this concept. 4317 4318 For a square global matrix we define each processor's diagonal portion 4319 to be its local rows and the corresponding columns (a square submatrix); 4320 each processor's off-diagonal portion encompasses the remainder of the 4321 local matrix (a rectangular submatrix). 4322 4323 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4324 4325 When calling this routine with a single process communicator, a matrix of 4326 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4327 type of communicator, use the construction mechanism 4328 .vb 4329 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4330 .ve 4331 4332 $ MatCreate(...,&A); 4333 $ MatSetType(A,MATMPIAIJ); 4334 $ MatSetSizes(A, m,n,M,N); 4335 $ MatMPIAIJSetPreallocation(A,...); 4336 4337 By default, this format uses inodes (identical nodes) when possible. 4338 We search for consecutive rows with the same nonzero structure, thereby 4339 reusing matrix information to achieve increased efficiency. 4340 4341 Options Database Keys: 4342 + -mat_no_inode - Do not use inodes 4343 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4344 4345 4346 4347 Example usage: 4348 4349 Consider the following 8x8 matrix with 34 non-zero values, that is 4350 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4351 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4352 as follows 4353 4354 .vb 4355 1 2 0 | 0 3 0 | 0 4 4356 Proc0 0 5 6 | 7 0 0 | 8 0 4357 9 0 10 | 11 0 0 | 12 0 4358 ------------------------------------- 4359 13 0 14 | 15 16 17 | 0 0 4360 Proc1 0 18 0 | 19 20 21 | 0 0 4361 0 0 0 | 22 23 0 | 24 0 4362 ------------------------------------- 4363 Proc2 25 26 27 | 0 0 28 | 29 0 4364 30 0 0 | 31 32 33 | 0 34 4365 .ve 4366 4367 This can be represented as a collection of submatrices as 4368 4369 .vb 4370 A B C 4371 D E F 4372 G H I 4373 .ve 4374 4375 Where the submatrices A,B,C are owned by proc0, D,E,F are 4376 owned by proc1, G,H,I are owned by proc2. 4377 4378 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4379 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4380 The 'M','N' parameters are 8,8, and have the same values on all procs. 4381 4382 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4383 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4384 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4385 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4386 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4387 matrix, ans [DF] as another SeqAIJ matrix. 4388 4389 When d_nz, o_nz parameters are specified, d_nz storage elements are 4390 allocated for every row of the local diagonal submatrix, and o_nz 4391 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4392 One way to choose d_nz and o_nz is to use the max nonzerors per local 4393 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4394 In this case, the values of d_nz,o_nz are 4395 .vb 4396 proc0 : dnz = 2, o_nz = 2 4397 proc1 : dnz = 3, o_nz = 2 4398 proc2 : dnz = 1, o_nz = 4 4399 .ve 4400 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4401 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4402 for proc3. i.e we are using 12+15+10=37 storage locations to store 4403 34 values. 4404 4405 When d_nnz, o_nnz parameters are specified, the storage is specified 4406 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4407 In the above case the values for d_nnz,o_nnz are 4408 .vb 4409 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4410 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4411 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4412 .ve 4413 Here the space allocated is sum of all the above values i.e 34, and 4414 hence pre-allocation is perfect. 4415 4416 Level: intermediate 4417 4418 .keywords: matrix, aij, compressed row, sparse, parallel 4419 4420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4421 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4422 @*/ 4423 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4424 { 4425 PetscErrorCode ierr; 4426 PetscMPIInt size; 4427 4428 PetscFunctionBegin; 4429 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4430 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4431 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4432 if (size > 1) { 4433 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4434 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4435 } else { 4436 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4437 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4438 } 4439 PetscFunctionReturn(0); 4440 } 4441 4442 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4443 { 4444 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4445 PetscBool flg; 4446 PetscErrorCode ierr; 4447 4448 PetscFunctionBegin; 4449 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4450 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4451 if (Ad) *Ad = a->A; 4452 if (Ao) *Ao = a->B; 4453 if (colmap) *colmap = a->garray; 4454 PetscFunctionReturn(0); 4455 } 4456 4457 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4458 { 4459 PetscErrorCode ierr; 4460 PetscInt m,N,i,rstart,nnz,Ii; 4461 PetscInt *indx; 4462 PetscScalar *values; 4463 4464 PetscFunctionBegin; 4465 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4466 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4467 PetscInt *dnz,*onz,sum,bs,cbs; 4468 4469 if (n == PETSC_DECIDE) { 4470 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4471 } 4472 /* Check sum(n) = N */ 4473 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4474 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4475 4476 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4477 rstart -= m; 4478 4479 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4480 for (i=0; i<m; i++) { 4481 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4482 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4483 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4484 } 4485 4486 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4487 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4488 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4489 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4490 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4491 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4492 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4493 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4494 } 4495 4496 /* numeric phase */ 4497 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4498 for (i=0; i<m; i++) { 4499 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4500 Ii = i + rstart; 4501 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4502 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4503 } 4504 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4505 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4510 { 4511 PetscErrorCode ierr; 4512 PetscMPIInt rank; 4513 PetscInt m,N,i,rstart,nnz; 4514 size_t len; 4515 const PetscInt *indx; 4516 PetscViewer out; 4517 char *name; 4518 Mat B; 4519 const PetscScalar *values; 4520 4521 PetscFunctionBegin; 4522 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4523 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4524 /* Should this be the type of the diagonal block of A? */ 4525 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4526 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4527 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4528 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4529 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4530 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4531 for (i=0; i<m; i++) { 4532 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4533 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4534 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4535 } 4536 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4537 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4538 4539 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4540 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4541 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4542 sprintf(name,"%s.%d",outfile,rank); 4543 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4544 ierr = PetscFree(name);CHKERRQ(ierr); 4545 ierr = MatView(B,out);CHKERRQ(ierr); 4546 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4547 ierr = MatDestroy(&B);CHKERRQ(ierr); 4548 PetscFunctionReturn(0); 4549 } 4550 4551 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4552 { 4553 PetscErrorCode ierr; 4554 Mat_Merge_SeqsToMPI *merge; 4555 PetscContainer container; 4556 4557 PetscFunctionBegin; 4558 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4559 if (container) { 4560 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4568 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4569 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4570 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4571 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4572 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4573 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4574 ierr = PetscFree(merge);CHKERRQ(ierr); 4575 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4576 } 4577 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4578 PetscFunctionReturn(0); 4579 } 4580 4581 #include <../src/mat/utils/freespace.h> 4582 #include <petscbt.h> 4583 4584 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4585 { 4586 PetscErrorCode ierr; 4587 MPI_Comm comm; 4588 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4589 PetscMPIInt size,rank,taga,*len_s; 4590 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4591 PetscInt proc,m; 4592 PetscInt **buf_ri,**buf_rj; 4593 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4594 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4595 MPI_Request *s_waits,*r_waits; 4596 MPI_Status *status; 4597 MatScalar *aa=a->a; 4598 MatScalar **abuf_r,*ba_i; 4599 Mat_Merge_SeqsToMPI *merge; 4600 PetscContainer container; 4601 4602 PetscFunctionBegin; 4603 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4604 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4605 4606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4607 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4608 4609 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4610 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4611 4612 bi = merge->bi; 4613 bj = merge->bj; 4614 buf_ri = merge->buf_ri; 4615 buf_rj = merge->buf_rj; 4616 4617 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4618 owners = merge->rowmap->range; 4619 len_s = merge->len_s; 4620 4621 /* send and recv matrix values */ 4622 /*-----------------------------*/ 4623 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4624 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4625 4626 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4627 for (proc=0,k=0; proc<size; proc++) { 4628 if (!len_s[proc]) continue; 4629 i = owners[proc]; 4630 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4631 k++; 4632 } 4633 4634 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4635 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4636 ierr = PetscFree(status);CHKERRQ(ierr); 4637 4638 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4639 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4640 4641 /* insert mat values of mpimat */ 4642 /*----------------------------*/ 4643 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4644 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4645 4646 for (k=0; k<merge->nrecv; k++) { 4647 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4648 nrows = *(buf_ri_k[k]); 4649 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4650 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4651 } 4652 4653 /* set values of ba */ 4654 m = merge->rowmap->n; 4655 for (i=0; i<m; i++) { 4656 arow = owners[rank] + i; 4657 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4658 bnzi = bi[i+1] - bi[i]; 4659 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4660 4661 /* add local non-zero vals of this proc's seqmat into ba */ 4662 anzi = ai[arow+1] - ai[arow]; 4663 aj = a->j + ai[arow]; 4664 aa = a->a + ai[arow]; 4665 nextaj = 0; 4666 for (j=0; nextaj<anzi; j++) { 4667 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4668 ba_i[j] += aa[nextaj++]; 4669 } 4670 } 4671 4672 /* add received vals into ba */ 4673 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4674 /* i-th row */ 4675 if (i == *nextrow[k]) { 4676 anzi = *(nextai[k]+1) - *nextai[k]; 4677 aj = buf_rj[k] + *(nextai[k]); 4678 aa = abuf_r[k] + *(nextai[k]); 4679 nextaj = 0; 4680 for (j=0; nextaj<anzi; j++) { 4681 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4682 ba_i[j] += aa[nextaj++]; 4683 } 4684 } 4685 nextrow[k]++; nextai[k]++; 4686 } 4687 } 4688 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4689 } 4690 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4691 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4692 4693 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4694 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4695 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4696 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4697 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4698 PetscFunctionReturn(0); 4699 } 4700 4701 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4702 { 4703 PetscErrorCode ierr; 4704 Mat B_mpi; 4705 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4706 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4707 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4708 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4709 PetscInt len,proc,*dnz,*onz,bs,cbs; 4710 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4711 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4712 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4713 MPI_Status *status; 4714 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4715 PetscBT lnkbt; 4716 Mat_Merge_SeqsToMPI *merge; 4717 PetscContainer container; 4718 4719 PetscFunctionBegin; 4720 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4721 4722 /* make sure it is a PETSc comm */ 4723 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4726 4727 ierr = PetscNew(&merge);CHKERRQ(ierr); 4728 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4729 4730 /* determine row ownership */ 4731 /*---------------------------------------------------------*/ 4732 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4733 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4734 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4735 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4736 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4737 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4738 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4739 4740 m = merge->rowmap->n; 4741 owners = merge->rowmap->range; 4742 4743 /* determine the number of messages to send, their lengths */ 4744 /*---------------------------------------------------------*/ 4745 len_s = merge->len_s; 4746 4747 len = 0; /* length of buf_si[] */ 4748 merge->nsend = 0; 4749 for (proc=0; proc<size; proc++) { 4750 len_si[proc] = 0; 4751 if (proc == rank) { 4752 len_s[proc] = 0; 4753 } else { 4754 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4755 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4756 } 4757 if (len_s[proc]) { 4758 merge->nsend++; 4759 nrows = 0; 4760 for (i=owners[proc]; i<owners[proc+1]; i++) { 4761 if (ai[i+1] > ai[i]) nrows++; 4762 } 4763 len_si[proc] = 2*(nrows+1); 4764 len += len_si[proc]; 4765 } 4766 } 4767 4768 /* determine the number and length of messages to receive for ij-structure */ 4769 /*-------------------------------------------------------------------------*/ 4770 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4771 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4772 4773 /* post the Irecv of j-structure */ 4774 /*-------------------------------*/ 4775 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4776 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4777 4778 /* post the Isend of j-structure */ 4779 /*--------------------------------*/ 4780 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4781 4782 for (proc=0, k=0; proc<size; proc++) { 4783 if (!len_s[proc]) continue; 4784 i = owners[proc]; 4785 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4786 k++; 4787 } 4788 4789 /* receives and sends of j-structure are complete */ 4790 /*------------------------------------------------*/ 4791 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4792 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4793 4794 /* send and recv i-structure */ 4795 /*---------------------------*/ 4796 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4797 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4798 4799 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4800 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4801 for (proc=0,k=0; proc<size; proc++) { 4802 if (!len_s[proc]) continue; 4803 /* form outgoing message for i-structure: 4804 buf_si[0]: nrows to be sent 4805 [1:nrows]: row index (global) 4806 [nrows+1:2*nrows+1]: i-structure index 4807 */ 4808 /*-------------------------------------------*/ 4809 nrows = len_si[proc]/2 - 1; 4810 buf_si_i = buf_si + nrows+1; 4811 buf_si[0] = nrows; 4812 buf_si_i[0] = 0; 4813 nrows = 0; 4814 for (i=owners[proc]; i<owners[proc+1]; i++) { 4815 anzi = ai[i+1] - ai[i]; 4816 if (anzi) { 4817 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4818 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4819 nrows++; 4820 } 4821 } 4822 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4823 k++; 4824 buf_si += len_si[proc]; 4825 } 4826 4827 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4828 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4829 4830 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4831 for (i=0; i<merge->nrecv; i++) { 4832 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4833 } 4834 4835 ierr = PetscFree(len_si);CHKERRQ(ierr); 4836 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4837 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4838 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4839 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4840 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4841 ierr = PetscFree(status);CHKERRQ(ierr); 4842 4843 /* compute a local seq matrix in each processor */ 4844 /*----------------------------------------------*/ 4845 /* allocate bi array and free space for accumulating nonzero column info */ 4846 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4847 bi[0] = 0; 4848 4849 /* create and initialize a linked list */ 4850 nlnk = N+1; 4851 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4852 4853 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4854 len = ai[owners[rank+1]] - ai[owners[rank]]; 4855 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4856 4857 current_space = free_space; 4858 4859 /* determine symbolic info for each local row */ 4860 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4861 4862 for (k=0; k<merge->nrecv; k++) { 4863 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4864 nrows = *buf_ri_k[k]; 4865 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4866 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4867 } 4868 4869 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4870 len = 0; 4871 for (i=0; i<m; i++) { 4872 bnzi = 0; 4873 /* add local non-zero cols of this proc's seqmat into lnk */ 4874 arow = owners[rank] + i; 4875 anzi = ai[arow+1] - ai[arow]; 4876 aj = a->j + ai[arow]; 4877 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4878 bnzi += nlnk; 4879 /* add received col data into lnk */ 4880 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4881 if (i == *nextrow[k]) { /* i-th row */ 4882 anzi = *(nextai[k]+1) - *nextai[k]; 4883 aj = buf_rj[k] + *nextai[k]; 4884 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4885 bnzi += nlnk; 4886 nextrow[k]++; nextai[k]++; 4887 } 4888 } 4889 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4890 4891 /* if free space is not available, make more free space */ 4892 if (current_space->local_remaining<bnzi) { 4893 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4894 nspacedouble++; 4895 } 4896 /* copy data into free space, then initialize lnk */ 4897 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4898 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4899 4900 current_space->array += bnzi; 4901 current_space->local_used += bnzi; 4902 current_space->local_remaining -= bnzi; 4903 4904 bi[i+1] = bi[i] + bnzi; 4905 } 4906 4907 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4908 4909 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4910 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4911 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4912 4913 /* create symbolic parallel matrix B_mpi */ 4914 /*---------------------------------------*/ 4915 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4916 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4917 if (n==PETSC_DECIDE) { 4918 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4919 } else { 4920 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4921 } 4922 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4923 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4924 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4925 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4926 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4927 4928 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4929 B_mpi->assembled = PETSC_FALSE; 4930 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4931 merge->bi = bi; 4932 merge->bj = bj; 4933 merge->buf_ri = buf_ri; 4934 merge->buf_rj = buf_rj; 4935 merge->coi = NULL; 4936 merge->coj = NULL; 4937 merge->owners_co = NULL; 4938 4939 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4940 4941 /* attach the supporting struct to B_mpi for reuse */ 4942 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4943 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4944 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4945 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4946 *mpimat = B_mpi; 4947 4948 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 /*@C 4953 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4954 matrices from each processor 4955 4956 Collective on MPI_Comm 4957 4958 Input Parameters: 4959 + comm - the communicators the parallel matrix will live on 4960 . seqmat - the input sequential matrices 4961 . m - number of local rows (or PETSC_DECIDE) 4962 . n - number of local columns (or PETSC_DECIDE) 4963 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4964 4965 Output Parameter: 4966 . mpimat - the parallel matrix generated 4967 4968 Level: advanced 4969 4970 Notes: 4971 The dimensions of the sequential matrix in each processor MUST be the same. 4972 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4973 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4974 @*/ 4975 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4976 { 4977 PetscErrorCode ierr; 4978 PetscMPIInt size; 4979 4980 PetscFunctionBegin; 4981 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4982 if (size == 1) { 4983 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4986 } else { 4987 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4988 } 4989 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4993 if (scall == MAT_INITIAL_MATRIX) { 4994 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4995 } 4996 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4997 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4998 PetscFunctionReturn(0); 4999 } 5000 5001 /*@ 5002 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5003 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5004 with MatGetSize() 5005 5006 Not Collective 5007 5008 Input Parameters: 5009 + A - the matrix 5010 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5011 5012 Output Parameter: 5013 . A_loc - the local sequential matrix generated 5014 5015 Level: developer 5016 5017 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5018 5019 @*/ 5020 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5021 { 5022 PetscErrorCode ierr; 5023 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5024 Mat_SeqAIJ *mat,*a,*b; 5025 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5026 MatScalar *aa,*ba,*cam; 5027 PetscScalar *ca; 5028 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5029 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5030 PetscBool match; 5031 MPI_Comm comm; 5032 PetscMPIInt size; 5033 5034 PetscFunctionBegin; 5035 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5036 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5037 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5038 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5039 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5040 5041 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5042 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5043 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5044 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5045 aa = a->a; ba = b->a; 5046 if (scall == MAT_INITIAL_MATRIX) { 5047 if (size == 1) { 5048 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5049 PetscFunctionReturn(0); 5050 } 5051 5052 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5053 ci[0] = 0; 5054 for (i=0; i<am; i++) { 5055 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5056 } 5057 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5058 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5059 k = 0; 5060 for (i=0; i<am; i++) { 5061 ncols_o = bi[i+1] - bi[i]; 5062 ncols_d = ai[i+1] - ai[i]; 5063 /* off-diagonal portion of A */ 5064 for (jo=0; jo<ncols_o; jo++) { 5065 col = cmap[*bj]; 5066 if (col >= cstart) break; 5067 cj[k] = col; bj++; 5068 ca[k++] = *ba++; 5069 } 5070 /* diagonal portion of A */ 5071 for (j=0; j<ncols_d; j++) { 5072 cj[k] = cstart + *aj++; 5073 ca[k++] = *aa++; 5074 } 5075 /* off-diagonal portion of A */ 5076 for (j=jo; j<ncols_o; j++) { 5077 cj[k] = cmap[*bj++]; 5078 ca[k++] = *ba++; 5079 } 5080 } 5081 /* put together the new matrix */ 5082 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5083 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5084 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5085 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5086 mat->free_a = PETSC_TRUE; 5087 mat->free_ij = PETSC_TRUE; 5088 mat->nonew = 0; 5089 } else if (scall == MAT_REUSE_MATRIX) { 5090 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5091 ci = mat->i; cj = mat->j; cam = mat->a; 5092 for (i=0; i<am; i++) { 5093 /* off-diagonal portion of A */ 5094 ncols_o = bi[i+1] - bi[i]; 5095 for (jo=0; jo<ncols_o; jo++) { 5096 col = cmap[*bj]; 5097 if (col >= cstart) break; 5098 *cam++ = *ba++; bj++; 5099 } 5100 /* diagonal portion of A */ 5101 ncols_d = ai[i+1] - ai[i]; 5102 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5103 /* off-diagonal portion of A */ 5104 for (j=jo; j<ncols_o; j++) { 5105 *cam++ = *ba++; bj++; 5106 } 5107 } 5108 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5109 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5110 PetscFunctionReturn(0); 5111 } 5112 5113 /*@C 5114 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5115 5116 Not Collective 5117 5118 Input Parameters: 5119 + A - the matrix 5120 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5121 - row, col - index sets of rows and columns to extract (or NULL) 5122 5123 Output Parameter: 5124 . A_loc - the local sequential matrix generated 5125 5126 Level: developer 5127 5128 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5129 5130 @*/ 5131 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5132 { 5133 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5134 PetscErrorCode ierr; 5135 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5136 IS isrowa,iscola; 5137 Mat *aloc; 5138 PetscBool match; 5139 5140 PetscFunctionBegin; 5141 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5142 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5143 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5144 if (!row) { 5145 start = A->rmap->rstart; end = A->rmap->rend; 5146 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5147 } else { 5148 isrowa = *row; 5149 } 5150 if (!col) { 5151 start = A->cmap->rstart; 5152 cmap = a->garray; 5153 nzA = a->A->cmap->n; 5154 nzB = a->B->cmap->n; 5155 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5156 ncols = 0; 5157 for (i=0; i<nzB; i++) { 5158 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5159 else break; 5160 } 5161 imark = i; 5162 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5163 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5164 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5165 } else { 5166 iscola = *col; 5167 } 5168 if (scall != MAT_INITIAL_MATRIX) { 5169 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5170 aloc[0] = *A_loc; 5171 } 5172 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5173 if (!col) { /* attach global id of condensed columns */ 5174 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5175 } 5176 *A_loc = aloc[0]; 5177 ierr = PetscFree(aloc);CHKERRQ(ierr); 5178 if (!row) { 5179 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5180 } 5181 if (!col) { 5182 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5183 } 5184 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5185 PetscFunctionReturn(0); 5186 } 5187 5188 /*@C 5189 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5190 5191 Collective on Mat 5192 5193 Input Parameters: 5194 + A,B - the matrices in mpiaij format 5195 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5196 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5197 5198 Output Parameter: 5199 + rowb, colb - index sets of rows and columns of B to extract 5200 - B_seq - the sequential matrix generated 5201 5202 Level: developer 5203 5204 @*/ 5205 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5206 { 5207 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5208 PetscErrorCode ierr; 5209 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5210 IS isrowb,iscolb; 5211 Mat *bseq=NULL; 5212 5213 PetscFunctionBegin; 5214 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5215 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5216 } 5217 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5218 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 start = A->cmap->rstart; 5221 cmap = a->garray; 5222 nzA = a->A->cmap->n; 5223 nzB = a->B->cmap->n; 5224 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5225 ncols = 0; 5226 for (i=0; i<nzB; i++) { /* row < local row index */ 5227 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5228 else break; 5229 } 5230 imark = i; 5231 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5232 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5233 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5234 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5235 } else { 5236 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5237 isrowb = *rowb; iscolb = *colb; 5238 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5239 bseq[0] = *B_seq; 5240 } 5241 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5242 *B_seq = bseq[0]; 5243 ierr = PetscFree(bseq);CHKERRQ(ierr); 5244 if (!rowb) { 5245 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5246 } else { 5247 *rowb = isrowb; 5248 } 5249 if (!colb) { 5250 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5251 } else { 5252 *colb = iscolb; 5253 } 5254 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5255 PetscFunctionReturn(0); 5256 } 5257 5258 #include <petsc/private/vecscatterimpl.h> 5259 /* 5260 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5261 of the OFF-DIAGONAL portion of local A 5262 5263 Collective on Mat 5264 5265 Input Parameters: 5266 + A,B - the matrices in mpiaij format 5267 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5268 5269 Output Parameter: 5270 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5271 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5272 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5273 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5274 5275 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5276 for this matrix. This is not desirable.. 5277 5278 Level: developer 5279 5280 */ 5281 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5282 { 5283 VecScatter_MPI_General *gen_to,*gen_from; 5284 PetscErrorCode ierr; 5285 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5286 Mat_SeqAIJ *b_oth; 5287 VecScatter ctx; 5288 MPI_Comm comm; 5289 PetscMPIInt *rprocs,*sprocs,tag,rank; 5290 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5291 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5292 PetscScalar *b_otha,*bufa,*bufA,*vals; 5293 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5294 MPI_Request *rwaits = NULL,*swaits = NULL; 5295 MPI_Status *sstatus,rstatus; 5296 PetscMPIInt jj,size; 5297 VecScatterType type; 5298 PetscBool mpi1; 5299 5300 PetscFunctionBegin; 5301 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5302 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5303 5304 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5305 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5306 } 5307 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5308 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5309 5310 if (size == 1) { 5311 startsj_s = NULL; 5312 bufa_ptr = NULL; 5313 *B_oth = NULL; 5314 PetscFunctionReturn(0); 5315 } 5316 5317 ctx = a->Mvctx; 5318 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5319 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5320 if (!mpi1) { 5321 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5322 thus create a->Mvctx_mpi1 */ 5323 if (!a->Mvctx_mpi1) { 5324 a->Mvctx_mpi1_flg = PETSC_TRUE; 5325 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5326 } 5327 ctx = a->Mvctx_mpi1; 5328 } 5329 tag = ((PetscObject)ctx)->tag; 5330 5331 gen_to = (VecScatter_MPI_General*)ctx->todata; 5332 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5333 nrecvs = gen_from->n; 5334 nsends = gen_to->n; 5335 5336 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5337 srow = gen_to->indices; /* local row index to be sent */ 5338 sstarts = gen_to->starts; 5339 sprocs = gen_to->procs; 5340 sstatus = gen_to->sstatus; 5341 sbs = gen_to->bs; 5342 rstarts = gen_from->starts; 5343 rprocs = gen_from->procs; 5344 rbs = gen_from->bs; 5345 5346 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5347 if (scall == MAT_INITIAL_MATRIX) { 5348 /* i-array */ 5349 /*---------*/ 5350 /* post receives */ 5351 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5352 for (i=0; i<nrecvs; i++) { 5353 rowlen = rvalues + rstarts[i]*rbs; 5354 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5355 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5356 } 5357 5358 /* pack the outgoing message */ 5359 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5360 5361 sstartsj[0] = 0; 5362 rstartsj[0] = 0; 5363 len = 0; /* total length of j or a array to be sent */ 5364 k = 0; 5365 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5366 for (i=0; i<nsends; i++) { 5367 rowlen = svalues + sstarts[i]*sbs; 5368 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5369 for (j=0; j<nrows; j++) { 5370 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5371 for (l=0; l<sbs; l++) { 5372 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5373 5374 rowlen[j*sbs+l] = ncols; 5375 5376 len += ncols; 5377 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5378 } 5379 k++; 5380 } 5381 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5382 5383 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5384 } 5385 /* recvs and sends of i-array are completed */ 5386 i = nrecvs; 5387 while (i--) { 5388 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5389 } 5390 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5391 ierr = PetscFree(svalues);CHKERRQ(ierr); 5392 5393 /* allocate buffers for sending j and a arrays */ 5394 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5395 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5396 5397 /* create i-array of B_oth */ 5398 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5399 5400 b_othi[0] = 0; 5401 len = 0; /* total length of j or a array to be received */ 5402 k = 0; 5403 for (i=0; i<nrecvs; i++) { 5404 rowlen = rvalues + rstarts[i]*rbs; 5405 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5406 for (j=0; j<nrows; j++) { 5407 b_othi[k+1] = b_othi[k] + rowlen[j]; 5408 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5409 k++; 5410 } 5411 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5412 } 5413 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5414 5415 /* allocate space for j and a arrrays of B_oth */ 5416 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5417 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5418 5419 /* j-array */ 5420 /*---------*/ 5421 /* post receives of j-array */ 5422 for (i=0; i<nrecvs; i++) { 5423 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5424 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5425 } 5426 5427 /* pack the outgoing message j-array */ 5428 k = 0; 5429 for (i=0; i<nsends; i++) { 5430 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5431 bufJ = bufj+sstartsj[i]; 5432 for (j=0; j<nrows; j++) { 5433 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5434 for (ll=0; ll<sbs; ll++) { 5435 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5436 for (l=0; l<ncols; l++) { 5437 *bufJ++ = cols[l]; 5438 } 5439 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5440 } 5441 } 5442 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5443 } 5444 5445 /* recvs and sends of j-array are completed */ 5446 i = nrecvs; 5447 while (i--) { 5448 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5449 } 5450 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5451 } else if (scall == MAT_REUSE_MATRIX) { 5452 sstartsj = *startsj_s; 5453 rstartsj = *startsj_r; 5454 bufa = *bufa_ptr; 5455 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5456 b_otha = b_oth->a; 5457 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5458 5459 /* a-array */ 5460 /*---------*/ 5461 /* post receives of a-array */ 5462 for (i=0; i<nrecvs; i++) { 5463 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5464 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5465 } 5466 5467 /* pack the outgoing message a-array */ 5468 k = 0; 5469 for (i=0; i<nsends; i++) { 5470 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5471 bufA = bufa+sstartsj[i]; 5472 for (j=0; j<nrows; j++) { 5473 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5474 for (ll=0; ll<sbs; ll++) { 5475 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5476 for (l=0; l<ncols; l++) { 5477 *bufA++ = vals[l]; 5478 } 5479 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5480 } 5481 } 5482 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5483 } 5484 /* recvs and sends of a-array are completed */ 5485 i = nrecvs; 5486 while (i--) { 5487 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5488 } 5489 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5490 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5491 5492 if (scall == MAT_INITIAL_MATRIX) { 5493 /* put together the new matrix */ 5494 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5495 5496 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5497 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5498 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5499 b_oth->free_a = PETSC_TRUE; 5500 b_oth->free_ij = PETSC_TRUE; 5501 b_oth->nonew = 0; 5502 5503 ierr = PetscFree(bufj);CHKERRQ(ierr); 5504 if (!startsj_s || !bufa_ptr) { 5505 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5506 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5507 } else { 5508 *startsj_s = sstartsj; 5509 *startsj_r = rstartsj; 5510 *bufa_ptr = bufa; 5511 } 5512 } 5513 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 /*@C 5518 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5519 5520 Not Collective 5521 5522 Input Parameters: 5523 . A - The matrix in mpiaij format 5524 5525 Output Parameter: 5526 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5527 . colmap - A map from global column index to local index into lvec 5528 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5529 5530 Level: developer 5531 5532 @*/ 5533 #if defined(PETSC_USE_CTABLE) 5534 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5535 #else 5536 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5537 #endif 5538 { 5539 Mat_MPIAIJ *a; 5540 5541 PetscFunctionBegin; 5542 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5543 PetscValidPointer(lvec, 2); 5544 PetscValidPointer(colmap, 3); 5545 PetscValidPointer(multScatter, 4); 5546 a = (Mat_MPIAIJ*) A->data; 5547 if (lvec) *lvec = a->lvec; 5548 if (colmap) *colmap = a->colmap; 5549 if (multScatter) *multScatter = a->Mvctx; 5550 PetscFunctionReturn(0); 5551 } 5552 5553 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5554 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5555 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5556 #if defined(PETSC_HAVE_MKL_SPARSE) 5557 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5558 #endif 5559 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5560 #if defined(PETSC_HAVE_ELEMENTAL) 5561 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5562 #endif 5563 #if defined(PETSC_HAVE_HYPRE) 5564 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5565 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5566 #endif 5567 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5568 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5569 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5570 5571 /* 5572 Computes (B'*A')' since computing B*A directly is untenable 5573 5574 n p p 5575 ( ) ( ) ( ) 5576 m ( A ) * n ( B ) = m ( C ) 5577 ( ) ( ) ( ) 5578 5579 */ 5580 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5581 { 5582 PetscErrorCode ierr; 5583 Mat At,Bt,Ct; 5584 5585 PetscFunctionBegin; 5586 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5587 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5588 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5589 ierr = MatDestroy(&At);CHKERRQ(ierr); 5590 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5591 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5592 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5597 { 5598 PetscErrorCode ierr; 5599 PetscInt m=A->rmap->n,n=B->cmap->n; 5600 Mat Cmat; 5601 5602 PetscFunctionBegin; 5603 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5604 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5605 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5606 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5607 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5608 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5609 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5610 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 5612 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5613 5614 *C = Cmat; 5615 PetscFunctionReturn(0); 5616 } 5617 5618 /* ----------------------------------------------------------------*/ 5619 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5620 { 5621 PetscErrorCode ierr; 5622 5623 PetscFunctionBegin; 5624 if (scall == MAT_INITIAL_MATRIX) { 5625 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5626 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5627 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5628 } 5629 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5630 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5631 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5632 PetscFunctionReturn(0); 5633 } 5634 5635 /*MC 5636 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5637 5638 Options Database Keys: 5639 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5640 5641 Level: beginner 5642 5643 .seealso: MatCreateAIJ() 5644 M*/ 5645 5646 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5647 { 5648 Mat_MPIAIJ *b; 5649 PetscErrorCode ierr; 5650 PetscMPIInt size; 5651 5652 PetscFunctionBegin; 5653 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5654 5655 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5656 B->data = (void*)b; 5657 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5658 B->assembled = PETSC_FALSE; 5659 B->insertmode = NOT_SET_VALUES; 5660 b->size = size; 5661 5662 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5663 5664 /* build cache for off array entries formed */ 5665 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5666 5667 b->donotstash = PETSC_FALSE; 5668 b->colmap = 0; 5669 b->garray = 0; 5670 b->roworiented = PETSC_TRUE; 5671 5672 /* stuff used for matrix vector multiply */ 5673 b->lvec = NULL; 5674 b->Mvctx = NULL; 5675 5676 /* stuff for MatGetRow() */ 5677 b->rowindices = 0; 5678 b->rowvalues = 0; 5679 b->getrowactive = PETSC_FALSE; 5680 5681 /* flexible pointer used in CUSP/CUSPARSE classes */ 5682 b->spptr = NULL; 5683 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5694 #if defined(PETSC_HAVE_MKL_SPARSE) 5695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5696 #endif 5697 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5698 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5699 #if defined(PETSC_HAVE_ELEMENTAL) 5700 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5701 #endif 5702 #if defined(PETSC_HAVE_HYPRE) 5703 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5704 #endif 5705 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5706 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5707 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5708 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5709 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5710 #if defined(PETSC_HAVE_HYPRE) 5711 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5712 #endif 5713 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5714 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5715 PetscFunctionReturn(0); 5716 } 5717 5718 /*@C 5719 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5720 and "off-diagonal" part of the matrix in CSR format. 5721 5722 Collective on MPI_Comm 5723 5724 Input Parameters: 5725 + comm - MPI communicator 5726 . m - number of local rows (Cannot be PETSC_DECIDE) 5727 . n - This value should be the same as the local size used in creating the 5728 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5729 calculated if N is given) For square matrices n is almost always m. 5730 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5731 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5732 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5733 . j - column indices 5734 . a - matrix values 5735 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5736 . oj - column indices 5737 - oa - matrix values 5738 5739 Output Parameter: 5740 . mat - the matrix 5741 5742 Level: advanced 5743 5744 Notes: 5745 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5746 must free the arrays once the matrix has been destroyed and not before. 5747 5748 The i and j indices are 0 based 5749 5750 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5751 5752 This sets local rows and cannot be used to set off-processor values. 5753 5754 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5755 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5756 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5757 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5758 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5759 communication if it is known that only local entries will be set. 5760 5761 .keywords: matrix, aij, compressed row, sparse, parallel 5762 5763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5764 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5765 @*/ 5766 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5767 { 5768 PetscErrorCode ierr; 5769 Mat_MPIAIJ *maij; 5770 5771 PetscFunctionBegin; 5772 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5773 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5774 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5775 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5776 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5777 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5778 maij = (Mat_MPIAIJ*) (*mat)->data; 5779 5780 (*mat)->preallocated = PETSC_TRUE; 5781 5782 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5783 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5784 5785 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5786 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5787 5788 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5789 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5790 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5791 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5792 5793 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5794 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5795 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5796 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5797 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5798 PetscFunctionReturn(0); 5799 } 5800 5801 /* 5802 Special version for direct calls from Fortran 5803 */ 5804 #include <petsc/private/fortranimpl.h> 5805 5806 /* Change these macros so can be used in void function */ 5807 #undef CHKERRQ 5808 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5809 #undef SETERRQ2 5810 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5811 #undef SETERRQ3 5812 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5813 #undef SETERRQ 5814 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5815 5816 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5817 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5818 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5819 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5820 #else 5821 #endif 5822 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5823 { 5824 Mat mat = *mmat; 5825 PetscInt m = *mm, n = *mn; 5826 InsertMode addv = *maddv; 5827 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5828 PetscScalar value; 5829 PetscErrorCode ierr; 5830 5831 MatCheckPreallocated(mat,1); 5832 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5833 5834 #if defined(PETSC_USE_DEBUG) 5835 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5836 #endif 5837 { 5838 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5839 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5840 PetscBool roworiented = aij->roworiented; 5841 5842 /* Some Variables required in the macro */ 5843 Mat A = aij->A; 5844 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5845 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5846 MatScalar *aa = a->a; 5847 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5848 Mat B = aij->B; 5849 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5850 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5851 MatScalar *ba = b->a; 5852 5853 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5854 PetscInt nonew = a->nonew; 5855 MatScalar *ap1,*ap2; 5856 5857 PetscFunctionBegin; 5858 for (i=0; i<m; i++) { 5859 if (im[i] < 0) continue; 5860 #if defined(PETSC_USE_DEBUG) 5861 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5862 #endif 5863 if (im[i] >= rstart && im[i] < rend) { 5864 row = im[i] - rstart; 5865 lastcol1 = -1; 5866 rp1 = aj + ai[row]; 5867 ap1 = aa + ai[row]; 5868 rmax1 = aimax[row]; 5869 nrow1 = ailen[row]; 5870 low1 = 0; 5871 high1 = nrow1; 5872 lastcol2 = -1; 5873 rp2 = bj + bi[row]; 5874 ap2 = ba + bi[row]; 5875 rmax2 = bimax[row]; 5876 nrow2 = bilen[row]; 5877 low2 = 0; 5878 high2 = nrow2; 5879 5880 for (j=0; j<n; j++) { 5881 if (roworiented) value = v[i*n+j]; 5882 else value = v[i+j*m]; 5883 if (in[j] >= cstart && in[j] < cend) { 5884 col = in[j] - cstart; 5885 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5886 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5887 } else if (in[j] < 0) continue; 5888 #if defined(PETSC_USE_DEBUG) 5889 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5890 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5891 #endif 5892 else { 5893 if (mat->was_assembled) { 5894 if (!aij->colmap) { 5895 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5896 } 5897 #if defined(PETSC_USE_CTABLE) 5898 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5899 col--; 5900 #else 5901 col = aij->colmap[in[j]] - 1; 5902 #endif 5903 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5904 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5905 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5906 col = in[j]; 5907 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5908 B = aij->B; 5909 b = (Mat_SeqAIJ*)B->data; 5910 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5911 rp2 = bj + bi[row]; 5912 ap2 = ba + bi[row]; 5913 rmax2 = bimax[row]; 5914 nrow2 = bilen[row]; 5915 low2 = 0; 5916 high2 = nrow2; 5917 bm = aij->B->rmap->n; 5918 ba = b->a; 5919 } 5920 } else col = in[j]; 5921 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5922 } 5923 } 5924 } else if (!aij->donotstash) { 5925 if (roworiented) { 5926 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5927 } else { 5928 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5929 } 5930 } 5931 } 5932 } 5933 PetscFunctionReturnVoid(); 5934 } 5935