1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 /* Easy test: symmetric diagonal block */ 1108 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1109 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1110 if (!*f) PetscFunctionReturn(0); 1111 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 if (size == 1) PetscFunctionReturn(0); 1114 1115 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1116 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1117 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1119 for (i=0; i<first; i++) notme[i] = i; 1120 for (i=last; i<M; i++) notme[i-last+first] = i; 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1122 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1123 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1124 Aoff = Aoffs[0]; 1125 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1126 Boff = Boffs[0]; 1127 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1129 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1131 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1132 ierr = PetscFree(notme);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 /* receive remote parts */ 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1167 { 1168 PetscErrorCode ierr; 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 1171 PetscFunctionBegin; 1172 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1173 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1185 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 #if defined(PETSC_USE_LOG) 1196 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1197 #endif 1198 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1199 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1201 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1202 #if defined(PETSC_USE_CTABLE) 1203 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1204 #else 1205 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1208 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1210 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1211 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1212 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1213 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_ELEMENTAL) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1230 #endif 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1240 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1241 PetscErrorCode ierr; 1242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1243 int fd; 1244 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1245 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1246 PetscScalar *column_values; 1247 PetscInt message_count,flowcontrolcount; 1248 FILE *file; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1253 nz = A->nz + B->nz; 1254 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 if (!rank) { 1256 header[0] = MAT_FILE_CLASSID; 1257 header[1] = mat->rmap->N; 1258 header[2] = mat->cmap->N; 1259 1260 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 /* get largest number of rows any processor has */ 1263 rlen = mat->rmap->n; 1264 range = mat->rmap->range; 1265 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1266 } else { 1267 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 rlen = mat->rmap->n; 1269 } 1270 1271 /* load up the local row counts */ 1272 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1273 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1274 1275 /* store the row lengths to the file */ 1276 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1277 if (!rank) { 1278 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 for (i=1; i<size; i++) { 1280 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1281 rlen = range[i+1] - range[i]; 1282 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1288 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1292 1293 /* load up the local column indices */ 1294 nzmax = nz; /* th processor needs space a largest processor needs */ 1295 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1297 cnt = 0; 1298 for (i=0; i<mat->rmap->n; i++) { 1299 for (j=B->i[i]; j<B->i[i+1]; j++) { 1300 if ((col = garray[B->j[j]]) > cstart) break; 1301 column_indices[cnt++] = col; 1302 } 1303 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1304 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1305 } 1306 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1307 1308 /* store the column indices to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 MPI_Status status; 1312 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1317 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1328 1329 /* load up the local column values */ 1330 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if (garray[B->j[j]] > cstart) break; 1335 column_values[cnt++] = B->a[j]; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1338 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column values to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_values);CHKERRQ(ierr); 1362 1363 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1364 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #include <petscdraw.h> 1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1372 PetscErrorCode ierr; 1373 PetscMPIInt rank = aij->rank,size = aij->size; 1374 PetscBool isdraw,iascii,isbinary; 1375 PetscViewer sviewer; 1376 PetscViewerFormat format; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1382 if (iascii) { 1383 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1384 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1385 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1386 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1387 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1388 for (i=0; i<(PetscInt)size; i++) { 1389 nmax = PetscMax(nmax,nz[i]); 1390 nmin = PetscMin(nmin,nz[i]); 1391 navg += nz[i]; 1392 } 1393 ierr = PetscFree(nz);CHKERRQ(ierr); 1394 navg = navg/size; 1395 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1399 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1400 MatInfo info; 1401 PetscBool inodes; 1402 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1404 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1405 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1407 if (!inodes) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1412 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1413 } 1414 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1421 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1424 PetscInt inodecount,inodelimit,*inodes; 1425 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1426 if (inodes) { 1427 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1433 PetscFunctionReturn(0); 1434 } 1435 } else if (isbinary) { 1436 if (size == 1) { 1437 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1439 } else { 1440 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (isdraw) { 1444 PetscDraw draw; 1445 PetscBool isnull; 1446 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1447 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1448 if (isnull) PetscFunctionReturn(0); 1449 } 1450 1451 { 1452 /* assemble the entire matrix onto first processor. */ 1453 Mat A; 1454 Mat_SeqAIJ *Aloc; 1455 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1456 MatScalar *a; 1457 1458 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1463 } 1464 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1465 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1466 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1467 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1469 1470 /* copy over the A part */ 1471 Aloc = (Mat_SeqAIJ*)aij->A->data; 1472 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 row = mat->rmap->rstart; 1474 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1475 for (i=0; i<m; i++) { 1476 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1477 row++; 1478 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1479 } 1480 aj = Aloc->j; 1481 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1482 1483 /* copy over the B part */ 1484 Aloc = (Mat_SeqAIJ*)aij->B->data; 1485 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 row = mat->rmap->rstart; 1487 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1488 ct = cols; 1489 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1490 for (i=0; i<m; i++) { 1491 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1492 row++; 1493 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1494 } 1495 ierr = PetscFree(ct);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 /* 1499 Everyone has to call to draw the matrix since the graphics waits are 1500 synchronized across all processors that share the PetscDraw object 1501 */ 1502 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1505 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1506 } 1507 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1515 { 1516 PetscErrorCode ierr; 1517 PetscBool iascii,isdraw,issocket,isbinary; 1518 1519 PetscFunctionBegin; 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1524 if (iascii || isdraw || isbinary || issocket) { 1525 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1531 { 1532 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1533 PetscErrorCode ierr; 1534 Vec bb1 = 0; 1535 PetscBool hasop; 1536 1537 PetscFunctionBegin; 1538 if (flag == SOR_APPLY_UPPER) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1544 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1545 } 1546 1547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1548 if (flag & SOR_ZERO_INITIAL_GUESS) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 its--; 1551 } 1552 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_EISENSTAT) { 1597 Vec xx1; 1598 1599 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1601 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (!mat->diag) { 1605 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1606 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1607 } 1608 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1609 if (hasop) { 1610 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1611 } else { 1612 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1613 } 1614 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1615 1616 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1620 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1621 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1622 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1623 1624 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1625 1626 matin->factorerrortype = mat->A->factorerrortype; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1631 { 1632 Mat aA,aB,Aperm; 1633 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1634 PetscScalar *aa,*ba; 1635 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1636 PetscSF rowsf,sf; 1637 IS parcolp = NULL; 1638 PetscBool done; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1644 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1645 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1646 1647 /* Invert row permutation to find out where my rows should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1652 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 1655 /* Invert column permutation to find out where my columns should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1660 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1665 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1666 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1667 1668 /* Find out where my gcols should go */ 1669 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1670 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1672 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1673 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1674 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1676 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1677 1678 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1680 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1681 for (i=0; i<m; i++) { 1682 PetscInt row = rdest[i],rowner; 1683 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1684 for (j=ai[i]; j<ai[i+1]; j++) { 1685 PetscInt cowner,col = cdest[aj[j]]; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 for (j=bi[i]; j<bi[i+1]; j++) { 1691 PetscInt cowner,col = gcdest[bj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscReal isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 MatCheckPreallocated(A,1); 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 case MAT_STRUCTURALLY_SYMMETRIC: 1838 MatCheckPreallocated(A,1); 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_SYMMETRY_ETERNAL: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2019 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2020 PetscErrorCode ierr; 2021 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2022 PetscInt cstart = A->cmap->rstart,ncol; 2023 Mat B; 2024 MatScalar *array; 2025 2026 PetscFunctionBegin; 2027 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2028 ai = Aloc->i; aj = Aloc->j; 2029 bi = Bloc->i; bj = Bloc->j; 2030 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2031 PetscInt *d_nnz,*g_nnz,*o_nnz; 2032 PetscSFNode *oloc; 2033 PETSC_UNUSED PetscSF sf; 2034 2035 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2036 /* compute d_nnz for preallocation */ 2037 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2038 for (i=0; i<ai[ma]; i++) { 2039 d_nnz[aj[i]]++; 2040 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2041 } 2042 /* compute local off-diagonal contributions */ 2043 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2044 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2045 /* map those to global */ 2046 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2047 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2048 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2049 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2050 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2051 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2052 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2053 2054 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2055 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2056 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2057 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2058 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2059 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2060 } else { 2061 B = *matout; 2062 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2063 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2064 } 2065 2066 /* copy over the A part */ 2067 array = Aloc->a; 2068 row = A->rmap->rstart; 2069 for (i=0; i<ma; i++) { 2070 ncol = ai[i+1]-ai[i]; 2071 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; aj += ncol; 2074 } 2075 aj = Aloc->j; 2076 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2077 2078 /* copy over the B part */ 2079 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2080 array = Bloc->a; 2081 row = A->rmap->rstart; 2082 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2083 cols_tmp = cols; 2084 for (i=0; i<mb; i++) { 2085 ncol = bi[i+1]-bi[i]; 2086 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2087 row++; 2088 array += ncol; cols_tmp += ncol; 2089 } 2090 ierr = PetscFree(cols);CHKERRQ(ierr); 2091 2092 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2094 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2095 *matout = B; 2096 } else { 2097 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2098 } 2099 PetscFunctionReturn(0); 2100 } 2101 2102 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2103 { 2104 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2105 Mat a = aij->A,b = aij->B; 2106 PetscErrorCode ierr; 2107 PetscInt s1,s2,s3; 2108 2109 PetscFunctionBegin; 2110 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2111 if (rr) { 2112 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2113 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2114 /* Overlap communication with computation. */ 2115 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 } 2117 if (ll) { 2118 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2119 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2120 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2121 } 2122 /* scale the diagonal block */ 2123 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2124 2125 if (rr) { 2126 /* Do a scatter end and then right scale the off-diagonal block */ 2127 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2128 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2129 } 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2134 { 2135 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2144 { 2145 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2146 Mat a,b,c,d; 2147 PetscBool flg; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 a = matA->A; b = matA->B; 2152 c = matB->A; d = matB->B; 2153 2154 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2155 if (flg) { 2156 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2157 } 2158 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2163 { 2164 PetscErrorCode ierr; 2165 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2166 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2167 2168 PetscFunctionBegin; 2169 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2170 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2171 /* because of the column compression in the off-processor part of the matrix a->B, 2172 the number of columns in a->B and b->B may be different, hence we cannot call 2173 the MatCopy() directly on the two parts. If need be, we can provide a more 2174 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2175 then copying the submatrices */ 2176 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2177 } else { 2178 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2179 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2180 } 2181 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2186 { 2187 PetscErrorCode ierr; 2188 2189 PetscFunctionBegin; 2190 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /* 2195 Computes the number of nonzeros per row needed for preallocation when X and Y 2196 have different nonzero structure. 2197 */ 2198 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2199 { 2200 PetscInt i,j,k,nzx,nzy; 2201 2202 PetscFunctionBegin; 2203 /* Set the number of nonzeros in the new matrix */ 2204 for (i=0; i<m; i++) { 2205 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2206 nzx = xi[i+1] - xi[i]; 2207 nzy = yi[i+1] - yi[i]; 2208 nnz[i] = 0; 2209 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2210 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2211 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2212 nnz[i]++; 2213 } 2214 for (; k<nzy; k++) nnz[i]++; 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2220 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2221 { 2222 PetscErrorCode ierr; 2223 PetscInt m = Y->rmap->N; 2224 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2225 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2226 2227 PetscFunctionBegin; 2228 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2233 { 2234 PetscErrorCode ierr; 2235 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2236 PetscBLASInt bnz,one=1; 2237 Mat_SeqAIJ *x,*y; 2238 2239 PetscFunctionBegin; 2240 if (str == SAME_NONZERO_PATTERN) { 2241 PetscScalar alpha = a; 2242 x = (Mat_SeqAIJ*)xx->A->data; 2243 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2244 y = (Mat_SeqAIJ*)yy->A->data; 2245 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2246 x = (Mat_SeqAIJ*)xx->B->data; 2247 y = (Mat_SeqAIJ*)yy->B->data; 2248 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2249 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2250 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2251 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2252 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2253 } else { 2254 Mat B; 2255 PetscInt *nnz_d,*nnz_o; 2256 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2257 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2258 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2259 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2260 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2261 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2262 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2263 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2264 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2265 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2266 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2267 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2269 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 2274 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2275 2276 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2277 { 2278 #if defined(PETSC_USE_COMPLEX) 2279 PetscErrorCode ierr; 2280 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2281 2282 PetscFunctionBegin; 2283 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2284 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2285 #else 2286 PetscFunctionBegin; 2287 #endif 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2298 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2309 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2314 { 2315 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2316 PetscErrorCode ierr; 2317 PetscInt i,*idxb = 0; 2318 PetscScalar *va,*vb; 2319 Vec vtmp; 2320 2321 PetscFunctionBegin; 2322 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2323 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2324 if (idx) { 2325 for (i=0; i<A->rmap->n; i++) { 2326 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2327 } 2328 } 2329 2330 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2331 if (idx) { 2332 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2333 } 2334 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2335 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2336 2337 for (i=0; i<A->rmap->n; i++) { 2338 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2339 va[i] = vb[i]; 2340 if (idx) idx[i] = a->garray[idxb[i]]; 2341 } 2342 } 2343 2344 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2345 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2346 ierr = PetscFree(idxb);CHKERRQ(ierr); 2347 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2348 PetscFunctionReturn(0); 2349 } 2350 2351 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2352 { 2353 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2354 PetscErrorCode ierr; 2355 PetscInt i,*idxb = 0; 2356 PetscScalar *va,*vb; 2357 Vec vtmp; 2358 2359 PetscFunctionBegin; 2360 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2361 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2362 if (idx) { 2363 for (i=0; i<A->cmap->n; i++) { 2364 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2365 } 2366 } 2367 2368 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2369 if (idx) { 2370 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2371 } 2372 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2373 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2374 2375 for (i=0; i<A->rmap->n; i++) { 2376 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2377 va[i] = vb[i]; 2378 if (idx) idx[i] = a->garray[idxb[i]]; 2379 } 2380 } 2381 2382 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2383 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2384 ierr = PetscFree(idxb);CHKERRQ(ierr); 2385 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2390 { 2391 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2392 PetscInt n = A->rmap->n; 2393 PetscInt cstart = A->cmap->rstart; 2394 PetscInt *cmap = mat->garray; 2395 PetscInt *diagIdx, *offdiagIdx; 2396 Vec diagV, offdiagV; 2397 PetscScalar *a, *diagA, *offdiagA; 2398 PetscInt r; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2403 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2404 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2405 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2406 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2407 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2408 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2409 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2410 for (r = 0; r < n; ++r) { 2411 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 idx[r] = cstart + diagIdx[r]; 2414 } else { 2415 a[r] = offdiagA[r]; 2416 idx[r] = cmap[offdiagIdx[r]]; 2417 } 2418 } 2419 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2420 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2421 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2422 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2423 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2424 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2429 { 2430 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2431 PetscInt n = A->rmap->n; 2432 PetscInt cstart = A->cmap->rstart; 2433 PetscInt *cmap = mat->garray; 2434 PetscInt *diagIdx, *offdiagIdx; 2435 Vec diagV, offdiagV; 2436 PetscScalar *a, *diagA, *offdiagA; 2437 PetscInt r; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2442 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2443 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2444 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2445 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 for (r = 0; r < n; ++r) { 2450 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2451 a[r] = diagA[r]; 2452 idx[r] = cstart + diagIdx[r]; 2453 } else { 2454 a[r] = offdiagA[r]; 2455 idx[r] = cmap[offdiagIdx[r]]; 2456 } 2457 } 2458 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2459 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2460 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2463 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2468 { 2469 PetscErrorCode ierr; 2470 Mat *dummy; 2471 2472 PetscFunctionBegin; 2473 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2474 *newmat = *dummy; 2475 ierr = PetscFree(dummy);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2480 { 2481 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2486 A->factorerrortype = a->A->factorerrortype; 2487 PetscFunctionReturn(0); 2488 } 2489 2490 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2491 { 2492 PetscErrorCode ierr; 2493 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2494 2495 PetscFunctionBegin; 2496 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2497 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2498 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2499 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2504 { 2505 PetscFunctionBegin; 2506 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2507 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2508 PetscFunctionReturn(0); 2509 } 2510 2511 /*@ 2512 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2513 2514 Collective on Mat 2515 2516 Input Parameters: 2517 + A - the matrix 2518 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2519 2520 Level: advanced 2521 2522 @*/ 2523 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2524 { 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2533 { 2534 PetscErrorCode ierr; 2535 PetscBool sc = PETSC_FALSE,flg; 2536 2537 PetscFunctionBegin; 2538 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2539 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2540 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2541 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2542 if (flg) { 2543 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2544 } 2545 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2550 { 2551 PetscErrorCode ierr; 2552 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2553 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2554 2555 PetscFunctionBegin; 2556 if (!Y->preallocated) { 2557 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2558 } else if (!aij->nz) { 2559 PetscInt nonew = aij->nonew; 2560 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2561 aij->nonew = nonew; 2562 } 2563 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2568 { 2569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2570 PetscErrorCode ierr; 2571 2572 PetscFunctionBegin; 2573 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2574 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2575 if (d) { 2576 PetscInt rstart; 2577 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2578 *d += rstart; 2579 2580 } 2581 PetscFunctionReturn(0); 2582 } 2583 2584 2585 /* -------------------------------------------------------------------*/ 2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2587 MatGetRow_MPIAIJ, 2588 MatRestoreRow_MPIAIJ, 2589 MatMult_MPIAIJ, 2590 /* 4*/ MatMultAdd_MPIAIJ, 2591 MatMultTranspose_MPIAIJ, 2592 MatMultTransposeAdd_MPIAIJ, 2593 0, 2594 0, 2595 0, 2596 /*10*/ 0, 2597 0, 2598 0, 2599 MatSOR_MPIAIJ, 2600 MatTranspose_MPIAIJ, 2601 /*15*/ MatGetInfo_MPIAIJ, 2602 MatEqual_MPIAIJ, 2603 MatGetDiagonal_MPIAIJ, 2604 MatDiagonalScale_MPIAIJ, 2605 MatNorm_MPIAIJ, 2606 /*20*/ MatAssemblyBegin_MPIAIJ, 2607 MatAssemblyEnd_MPIAIJ, 2608 MatSetOption_MPIAIJ, 2609 MatZeroEntries_MPIAIJ, 2610 /*24*/ MatZeroRows_MPIAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*29*/ MatSetUp_MPIAIJ, 2616 0, 2617 0, 2618 MatGetDiagonalBlock_MPIAIJ, 2619 0, 2620 /*34*/ MatDuplicate_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*39*/ MatAXPY_MPIAIJ, 2626 MatCreateSubMatrices_MPIAIJ, 2627 MatIncreaseOverlap_MPIAIJ, 2628 MatGetValues_MPIAIJ, 2629 MatCopy_MPIAIJ, 2630 /*44*/ MatGetRowMax_MPIAIJ, 2631 MatScale_MPIAIJ, 2632 MatShift_MPIAIJ, 2633 MatDiagonalSet_MPIAIJ, 2634 MatZeroRowsColumns_MPIAIJ, 2635 /*49*/ MatSetRandom_MPIAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*54*/ MatFDColoringCreate_MPIXAIJ, 2641 0, 2642 MatSetUnfactored_MPIAIJ, 2643 MatPermute_MPIAIJ, 2644 0, 2645 /*59*/ MatCreateSubMatrix_MPIAIJ, 2646 MatDestroy_MPIAIJ, 2647 MatView_MPIAIJ, 2648 0, 2649 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2650 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2651 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2656 MatGetRowMinAbs_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*75*/ MatFDColoringApply_AIJ, 2662 MatSetFromOptions_MPIAIJ, 2663 0, 2664 0, 2665 MatFindZeroDiagonals_MPIAIJ, 2666 /*80*/ 0, 2667 0, 2668 0, 2669 /*83*/ MatLoad_MPIAIJ, 2670 MatIsSymmetric_MPIAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2676 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2677 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2678 MatPtAP_MPIAIJ_MPIAIJ, 2679 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2680 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*99*/ 0, 2686 0, 2687 0, 2688 MatConjugate_MPIAIJ, 2689 0, 2690 /*104*/MatSetValuesRow_MPIAIJ, 2691 MatRealPart_MPIAIJ, 2692 MatImaginaryPart_MPIAIJ, 2693 0, 2694 0, 2695 /*109*/0, 2696 0, 2697 MatGetRowMin_MPIAIJ, 2698 0, 2699 MatMissingDiagonal_MPIAIJ, 2700 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2701 0, 2702 MatGetGhosts_MPIAIJ, 2703 0, 2704 0, 2705 /*119*/0, 2706 0, 2707 0, 2708 0, 2709 MatGetMultiProcBlock_MPIAIJ, 2710 /*124*/MatFindNonzeroRows_MPIAIJ, 2711 MatGetColumnNorms_MPIAIJ, 2712 MatInvertBlockDiagonal_MPIAIJ, 2713 0, 2714 MatCreateSubMatricesMPI_MPIAIJ, 2715 /*129*/0, 2716 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2718 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2719 0, 2720 /*134*/0, 2721 0, 2722 MatRARt_MPIAIJ_MPIAIJ, 2723 0, 2724 0, 2725 /*139*/MatSetBlockSizes_MPIAIJ, 2726 0, 2727 0, 2728 MatFDColoringSetUp_MPIXAIJ, 2729 MatFindOffBlockDiagonalEntries_MPIAIJ, 2730 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2731 }; 2732 2733 /* ----------------------------------------------------------------------------------------*/ 2734 2735 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2736 { 2737 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2742 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2758 { 2759 Mat_MPIAIJ *b; 2760 PetscErrorCode ierr; 2761 2762 PetscFunctionBegin; 2763 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2765 b = (Mat_MPIAIJ*)B->data; 2766 2767 #if defined(PETSC_USE_CTABLE) 2768 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2771 #endif 2772 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2773 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2774 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2775 2776 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2777 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2778 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2779 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2780 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2781 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2783 2784 if (!B->preallocated) { 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2787 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2788 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2790 } 2791 2792 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2793 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2794 B->preallocated = PETSC_TRUE; 2795 B->was_assembled = PETSC_FALSE; 2796 B->assembled = PETSC_FALSE;; 2797 PetscFunctionReturn(0); 2798 } 2799 2800 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2801 { 2802 Mat_MPIAIJ *b; 2803 PetscErrorCode ierr; 2804 2805 PetscFunctionBegin; 2806 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2807 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2808 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2809 b = (Mat_MPIAIJ*)B->data; 2810 2811 #if defined(PETSC_USE_CTABLE) 2812 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2813 #else 2814 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2815 #endif 2816 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2817 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2818 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2819 2820 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2821 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2822 B->preallocated = PETSC_TRUE; 2823 B->was_assembled = PETSC_FALSE; 2824 B->assembled = PETSC_FALSE; 2825 PetscFunctionReturn(0); 2826 } 2827 2828 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2829 { 2830 Mat mat; 2831 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2832 PetscErrorCode ierr; 2833 2834 PetscFunctionBegin; 2835 *newmat = 0; 2836 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2837 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2838 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2839 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2840 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2841 a = (Mat_MPIAIJ*)mat->data; 2842 2843 mat->factortype = matin->factortype; 2844 mat->assembled = PETSC_TRUE; 2845 mat->insertmode = NOT_SET_VALUES; 2846 mat->preallocated = PETSC_TRUE; 2847 2848 a->size = oldmat->size; 2849 a->rank = oldmat->rank; 2850 a->donotstash = oldmat->donotstash; 2851 a->roworiented = oldmat->roworiented; 2852 a->rowindices = 0; 2853 a->rowvalues = 0; 2854 a->getrowactive = PETSC_FALSE; 2855 2856 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2857 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2858 2859 if (oldmat->colmap) { 2860 #if defined(PETSC_USE_CTABLE) 2861 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2862 #else 2863 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2864 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2865 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2866 #endif 2867 } else a->colmap = 0; 2868 if (oldmat->garray) { 2869 PetscInt len; 2870 len = oldmat->B->cmap->n; 2871 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2872 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2873 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2874 } else a->garray = 0; 2875 2876 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2877 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2878 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2879 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2880 2881 if (oldmat->Mvctx_mpi1) { 2882 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2884 } 2885 2886 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2888 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2890 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2891 *newmat = mat; 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2896 { 2897 PetscScalar *vals,*svals; 2898 MPI_Comm comm; 2899 PetscErrorCode ierr; 2900 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2901 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2902 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2903 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2904 PetscInt cend,cstart,n,*rowners; 2905 int fd; 2906 PetscInt bs = newMat->rmap->bs; 2907 2908 PetscFunctionBegin; 2909 /* force binary viewer to load .info file if it has not yet done so */ 2910 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2911 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2913 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2914 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2915 if (!rank) { 2916 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2917 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2918 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2919 } 2920 2921 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2922 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2923 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2924 if (bs < 0) bs = 1; 2925 2926 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2927 M = header[1]; N = header[2]; 2928 2929 /* If global sizes are set, check if they are consistent with that given in the file */ 2930 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2931 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2932 2933 /* determine ownership of all (block) rows */ 2934 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2935 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2936 else m = newMat->rmap->n; /* Set by user */ 2937 2938 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2939 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2940 2941 /* First process needs enough room for process with most rows */ 2942 if (!rank) { 2943 mmax = rowners[1]; 2944 for (i=2; i<=size; i++) { 2945 mmax = PetscMax(mmax, rowners[i]); 2946 } 2947 } else mmax = -1; /* unused, but compilers complain */ 2948 2949 rowners[0] = 0; 2950 for (i=2; i<=size; i++) { 2951 rowners[i] += rowners[i-1]; 2952 } 2953 rstart = rowners[rank]; 2954 rend = rowners[rank+1]; 2955 2956 /* distribute row lengths to all processors */ 2957 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2958 if (!rank) { 2959 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2960 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2961 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2962 for (j=0; j<m; j++) { 2963 procsnz[0] += ourlens[j]; 2964 } 2965 for (i=1; i<size; i++) { 2966 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2967 /* calculate the number of nonzeros on each processor */ 2968 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2969 procsnz[i] += rowlengths[j]; 2970 } 2971 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2972 } 2973 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2974 } else { 2975 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2976 } 2977 2978 if (!rank) { 2979 /* determine max buffer needed and allocate it */ 2980 maxnz = 0; 2981 for (i=0; i<size; i++) { 2982 maxnz = PetscMax(maxnz,procsnz[i]); 2983 } 2984 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2985 2986 /* read in my part of the matrix column indices */ 2987 nz = procsnz[0]; 2988 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2989 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2990 2991 /* read in every one elses and ship off */ 2992 for (i=1; i<size; i++) { 2993 nz = procsnz[i]; 2994 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2995 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2996 } 2997 ierr = PetscFree(cols);CHKERRQ(ierr); 2998 } else { 2999 /* determine buffer space needed for message */ 3000 nz = 0; 3001 for (i=0; i<m; i++) { 3002 nz += ourlens[i]; 3003 } 3004 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3005 3006 /* receive message of column indices*/ 3007 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3008 } 3009 3010 /* determine column ownership if matrix is not square */ 3011 if (N != M) { 3012 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3013 else n = newMat->cmap->n; 3014 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3015 cstart = cend - n; 3016 } else { 3017 cstart = rstart; 3018 cend = rend; 3019 n = cend - cstart; 3020 } 3021 3022 /* loop over local rows, determining number of off diagonal entries */ 3023 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3024 jj = 0; 3025 for (i=0; i<m; i++) { 3026 for (j=0; j<ourlens[i]; j++) { 3027 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3028 jj++; 3029 } 3030 } 3031 3032 for (i=0; i<m; i++) { 3033 ourlens[i] -= offlens[i]; 3034 } 3035 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3036 3037 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3038 3039 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3040 3041 for (i=0; i<m; i++) { 3042 ourlens[i] += offlens[i]; 3043 } 3044 3045 if (!rank) { 3046 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3047 3048 /* read in my part of the matrix numerical values */ 3049 nz = procsnz[0]; 3050 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3051 3052 /* insert into matrix */ 3053 jj = rstart; 3054 smycols = mycols; 3055 svals = vals; 3056 for (i=0; i<m; i++) { 3057 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 smycols += ourlens[i]; 3059 svals += ourlens[i]; 3060 jj++; 3061 } 3062 3063 /* read in other processors and ship out */ 3064 for (i=1; i<size; i++) { 3065 nz = procsnz[i]; 3066 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3067 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3068 } 3069 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3070 } else { 3071 /* receive numeric values */ 3072 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3073 3074 /* receive message of values*/ 3075 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3076 3077 /* insert into matrix */ 3078 jj = rstart; 3079 smycols = mycols; 3080 svals = vals; 3081 for (i=0; i<m; i++) { 3082 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3083 smycols += ourlens[i]; 3084 svals += ourlens[i]; 3085 jj++; 3086 } 3087 } 3088 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3089 ierr = PetscFree(vals);CHKERRQ(ierr); 3090 ierr = PetscFree(mycols);CHKERRQ(ierr); 3091 ierr = PetscFree(rowners);CHKERRQ(ierr); 3092 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 /* Not scalable because of ISAllGather() unless getting all columns. */ 3098 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3099 { 3100 PetscErrorCode ierr; 3101 IS iscol_local; 3102 PetscBool isstride; 3103 PetscMPIInt lisstride=0,gisstride; 3104 3105 PetscFunctionBegin; 3106 /* check if we are grabbing all columns*/ 3107 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3108 3109 if (isstride) { 3110 PetscInt start,len,mstart,mlen; 3111 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3112 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3113 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3114 if (mstart == start && mlen-mstart == len) lisstride = 1; 3115 } 3116 3117 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3118 if (gisstride) { 3119 PetscInt N; 3120 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3121 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3122 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3123 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3124 } else { 3125 PetscInt cbs; 3126 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3127 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3128 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3129 } 3130 3131 *isseq = iscol_local; 3132 PetscFunctionReturn(0); 3133 } 3134 3135 /* 3136 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3137 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3138 3139 Input Parameters: 3140 mat - matrix 3141 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3142 i.e., mat->rstart <= isrow[i] < mat->rend 3143 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3144 i.e., mat->cstart <= iscol[i] < mat->cend 3145 Output Parameter: 3146 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3147 iscol_o - sequential column index set for retrieving mat->B 3148 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3149 */ 3150 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3151 { 3152 PetscErrorCode ierr; 3153 Vec x,cmap; 3154 const PetscInt *is_idx; 3155 PetscScalar *xarray,*cmaparray; 3156 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3157 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3158 Mat B=a->B; 3159 Vec lvec=a->lvec,lcmap; 3160 PetscInt i,cstart,cend,Bn=B->cmap->N; 3161 MPI_Comm comm; 3162 VecScatter Mvctx=a->Mvctx; 3163 3164 PetscFunctionBegin; 3165 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3166 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3167 3168 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3169 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3170 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3171 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3172 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3173 3174 /* Get start indices */ 3175 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3176 isstart -= ncols; 3177 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3178 3179 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3180 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3181 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3182 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3183 for (i=0; i<ncols; i++) { 3184 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3185 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3186 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3187 } 3188 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3189 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3190 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3191 3192 /* Get iscol_d */ 3193 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3194 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3195 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3196 3197 /* Get isrow_d */ 3198 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3199 rstart = mat->rmap->rstart; 3200 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3201 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3202 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3203 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3204 3205 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3206 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3207 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3208 3209 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3210 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3211 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3212 3213 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3214 3215 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3216 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3217 3218 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3219 /* off-process column indices */ 3220 count = 0; 3221 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3222 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3223 3224 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3225 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3226 for (i=0; i<Bn; i++) { 3227 if (PetscRealPart(xarray[i]) > -1.0) { 3228 idx[count] = i; /* local column index in off-diagonal part B */ 3229 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3230 count++; 3231 } 3232 } 3233 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3234 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3235 3236 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3237 /* cannot ensure iscol_o has same blocksize as iscol! */ 3238 3239 ierr = PetscFree(idx);CHKERRQ(ierr); 3240 *garray = cmap1; 3241 3242 ierr = VecDestroy(&x);CHKERRQ(ierr); 3243 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3244 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3245 PetscFunctionReturn(0); 3246 } 3247 3248 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3249 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3250 { 3251 PetscErrorCode ierr; 3252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3253 Mat M = NULL; 3254 MPI_Comm comm; 3255 IS iscol_d,isrow_d,iscol_o; 3256 Mat Asub = NULL,Bsub = NULL; 3257 PetscInt n; 3258 3259 PetscFunctionBegin; 3260 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3261 3262 if (call == MAT_REUSE_MATRIX) { 3263 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3264 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3265 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3266 3267 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3268 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3269 3270 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3271 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3272 3273 /* Update diagonal and off-diagonal portions of submat */ 3274 asub = (Mat_MPIAIJ*)(*submat)->data; 3275 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3276 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3277 if (n) { 3278 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3279 } 3280 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3282 3283 } else { /* call == MAT_INITIAL_MATRIX) */ 3284 const PetscInt *garray; 3285 PetscInt BsubN; 3286 3287 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3288 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3289 3290 /* Create local submatrices Asub and Bsub */ 3291 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3292 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3293 3294 /* Create submatrix M */ 3295 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3296 3297 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3298 asub = (Mat_MPIAIJ*)M->data; 3299 3300 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3301 n = asub->B->cmap->N; 3302 if (BsubN > n) { 3303 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3304 const PetscInt *idx; 3305 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3306 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3307 3308 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3309 j = 0; 3310 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3311 for (i=0; i<n; i++) { 3312 if (j >= BsubN) break; 3313 while (subgarray[i] > garray[j]) j++; 3314 3315 if (subgarray[i] == garray[j]) { 3316 idx_new[i] = idx[j++]; 3317 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3318 } 3319 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3320 3321 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3322 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3323 3324 } else if (BsubN < n) { 3325 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3326 } 3327 3328 ierr = PetscFree(garray);CHKERRQ(ierr); 3329 *submat = M; 3330 3331 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3332 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3333 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3334 3335 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3336 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3337 3338 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3339 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3340 } 3341 PetscFunctionReturn(0); 3342 } 3343 3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3345 { 3346 PetscErrorCode ierr; 3347 IS iscol_local=NULL,isrow_d; 3348 PetscInt csize; 3349 PetscInt n,i,j,start,end; 3350 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3351 MPI_Comm comm; 3352 3353 PetscFunctionBegin; 3354 /* If isrow has same processor distribution as mat, 3355 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3356 if (call == MAT_REUSE_MATRIX) { 3357 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3358 if (isrow_d) { 3359 sameRowDist = PETSC_TRUE; 3360 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3361 } else { 3362 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3363 if (iscol_local) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3366 } 3367 } 3368 } else { 3369 /* Check if isrow has same processor distribution as mat */ 3370 sameDist[0] = PETSC_FALSE; 3371 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3372 if (!n) { 3373 sameDist[0] = PETSC_TRUE; 3374 } else { 3375 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3376 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3377 if (i >= start && j < end) { 3378 sameDist[0] = PETSC_TRUE; 3379 } 3380 } 3381 3382 /* Check if iscol has same processor distribution as mat */ 3383 sameDist[1] = PETSC_FALSE; 3384 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3385 if (!n) { 3386 sameDist[1] = PETSC_TRUE; 3387 } else { 3388 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3389 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3390 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3391 } 3392 3393 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3394 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3395 sameRowDist = tsameDist[0]; 3396 } 3397 3398 if (sameRowDist) { 3399 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3400 /* isrow and iscol have same processor distribution as mat */ 3401 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3402 PetscFunctionReturn(0); 3403 } else { /* sameRowDist */ 3404 /* isrow has same processor distribution as mat */ 3405 if (call == MAT_INITIAL_MATRIX) { 3406 PetscBool sorted; 3407 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3408 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3409 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3410 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3411 3412 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3413 if (sorted) { 3414 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3415 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 } else { /* call == MAT_REUSE_MATRIX */ 3419 IS iscol_sub; 3420 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3421 if (iscol_sub) { 3422 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 } 3426 } 3427 } 3428 3429 /* General case: iscol -> iscol_local which has global size of iscol */ 3430 if (call == MAT_REUSE_MATRIX) { 3431 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3432 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3433 } else { 3434 if (!iscol_local) { 3435 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3436 } 3437 } 3438 3439 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3440 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3441 3442 if (call == MAT_INITIAL_MATRIX) { 3443 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3444 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3445 } 3446 PetscFunctionReturn(0); 3447 } 3448 3449 /*@C 3450 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3451 and "off-diagonal" part of the matrix in CSR format. 3452 3453 Collective on MPI_Comm 3454 3455 Input Parameters: 3456 + comm - MPI communicator 3457 . A - "diagonal" portion of matrix 3458 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3459 - garray - global index of B columns 3460 3461 Output Parameter: 3462 . mat - the matrix, with input A as its local diagonal matrix 3463 Level: advanced 3464 3465 Notes: 3466 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3467 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3468 3469 .seealso: MatCreateMPIAIJWithSplitArrays() 3470 @*/ 3471 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3472 { 3473 PetscErrorCode ierr; 3474 Mat_MPIAIJ *maij; 3475 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3476 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3477 PetscScalar *oa=b->a; 3478 Mat Bnew; 3479 PetscInt m,n,N; 3480 3481 PetscFunctionBegin; 3482 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3483 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3484 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3485 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3486 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3487 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3488 3489 /* Get global columns of mat */ 3490 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3491 3492 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3493 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3494 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3495 maij = (Mat_MPIAIJ*)(*mat)->data; 3496 3497 (*mat)->preallocated = PETSC_TRUE; 3498 3499 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3500 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3501 3502 /* Set A as diagonal portion of *mat */ 3503 maij->A = A; 3504 3505 nz = oi[m]; 3506 for (i=0; i<nz; i++) { 3507 col = oj[i]; 3508 oj[i] = garray[col]; 3509 } 3510 3511 /* Set Bnew as off-diagonal portion of *mat */ 3512 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3513 bnew = (Mat_SeqAIJ*)Bnew->data; 3514 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3515 maij->B = Bnew; 3516 3517 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3518 3519 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3520 b->free_a = PETSC_FALSE; 3521 b->free_ij = PETSC_FALSE; 3522 ierr = MatDestroy(&B);CHKERRQ(ierr); 3523 3524 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3525 bnew->free_a = PETSC_TRUE; 3526 bnew->free_ij = PETSC_TRUE; 3527 3528 /* condense columns of maij->B */ 3529 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3530 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3531 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3532 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3533 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3538 3539 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3540 { 3541 PetscErrorCode ierr; 3542 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3543 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3544 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3545 Mat M,Msub,B=a->B; 3546 MatScalar *aa; 3547 Mat_SeqAIJ *aij; 3548 PetscInt *garray = a->garray,*colsub,Ncols; 3549 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3550 IS iscol_sub,iscmap; 3551 const PetscInt *is_idx,*cmap; 3552 PetscBool allcolumns=PETSC_FALSE; 3553 MPI_Comm comm; 3554 3555 PetscFunctionBegin; 3556 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3557 3558 if (call == MAT_REUSE_MATRIX) { 3559 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3560 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3561 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3562 3563 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3564 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3565 3566 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3567 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3568 3569 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3570 3571 } else { /* call == MAT_INITIAL_MATRIX) */ 3572 PetscBool flg; 3573 3574 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3575 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3576 3577 /* (1) iscol -> nonscalable iscol_local */ 3578 /* Check for special case: each processor gets entire matrix columns */ 3579 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3580 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3581 if (allcolumns) { 3582 iscol_sub = iscol_local; 3583 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3584 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3585 3586 } else { 3587 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3588 PetscInt *idx,*cmap1,k; 3589 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3590 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3591 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3592 count = 0; 3593 k = 0; 3594 for (i=0; i<Ncols; i++) { 3595 j = is_idx[i]; 3596 if (j >= cstart && j < cend) { 3597 /* diagonal part of mat */ 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } else if (Bn) { 3601 /* off-diagonal part of mat */ 3602 if (j == garray[k]) { 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (j > garray[k]) { 3606 while (j > garray[k] && k < Bn-1) k++; 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } 3611 } 3612 } 3613 } 3614 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3615 3616 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3617 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3618 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3619 3620 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3621 } 3622 3623 /* (3) Create sequential Msub */ 3624 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3625 } 3626 3627 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3628 aij = (Mat_SeqAIJ*)(Msub)->data; 3629 ii = aij->i; 3630 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3631 3632 /* 3633 m - number of local rows 3634 Ncols - number of columns (same on all processors) 3635 rstart - first row in new global matrix generated 3636 */ 3637 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3638 3639 if (call == MAT_INITIAL_MATRIX) { 3640 /* (4) Create parallel newmat */ 3641 PetscMPIInt rank,size; 3642 PetscInt csize; 3643 3644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3646 3647 /* 3648 Determine the number of non-zeros in the diagonal and off-diagonal 3649 portions of the matrix in order to do correct preallocation 3650 */ 3651 3652 /* first get start and end of "diagonal" columns */ 3653 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3654 if (csize == PETSC_DECIDE) { 3655 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3656 if (mglobal == Ncols) { /* square matrix */ 3657 nlocal = m; 3658 } else { 3659 nlocal = Ncols/size + ((Ncols % size) > rank); 3660 } 3661 } else { 3662 nlocal = csize; 3663 } 3664 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3665 rstart = rend - nlocal; 3666 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3667 3668 /* next, compute all the lengths */ 3669 jj = aij->j; 3670 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3671 olens = dlens + m; 3672 for (i=0; i<m; i++) { 3673 jend = ii[i+1] - ii[i]; 3674 olen = 0; 3675 dlen = 0; 3676 for (j=0; j<jend; j++) { 3677 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3678 else dlen++; 3679 jj++; 3680 } 3681 olens[i] = olen; 3682 dlens[i] = dlen; 3683 } 3684 3685 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3686 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3687 3688 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3689 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3690 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3691 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3692 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3693 ierr = PetscFree(dlens);CHKERRQ(ierr); 3694 3695 } else { /* call == MAT_REUSE_MATRIX */ 3696 M = *newmat; 3697 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3698 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3699 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3700 /* 3701 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3702 rather than the slower MatSetValues(). 3703 */ 3704 M->was_assembled = PETSC_TRUE; 3705 M->assembled = PETSC_FALSE; 3706 } 3707 3708 /* (5) Set values of Msub to *newmat */ 3709 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3710 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3711 3712 jj = aij->j; 3713 aa = aij->a; 3714 for (i=0; i<m; i++) { 3715 row = rstart + i; 3716 nz = ii[i+1] - ii[i]; 3717 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3718 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3719 jj += nz; aa += nz; 3720 } 3721 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3722 3723 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725 3726 ierr = PetscFree(colsub);CHKERRQ(ierr); 3727 3728 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3729 if (call == MAT_INITIAL_MATRIX) { 3730 *newmat = M; 3731 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3732 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3733 3734 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3735 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3736 3737 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3738 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3739 3740 if (iscol_local) { 3741 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3742 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3743 } 3744 } 3745 PetscFunctionReturn(0); 3746 } 3747 3748 /* 3749 Not great since it makes two copies of the submatrix, first an SeqAIJ 3750 in local and then by concatenating the local matrices the end result. 3751 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3752 3753 Note: This requires a sequential iscol with all indices. 3754 */ 3755 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3756 { 3757 PetscErrorCode ierr; 3758 PetscMPIInt rank,size; 3759 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3760 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3761 Mat M,Mreuse; 3762 MatScalar *aa,*vwork; 3763 MPI_Comm comm; 3764 Mat_SeqAIJ *aij; 3765 PetscBool colflag,allcolumns=PETSC_FALSE; 3766 3767 PetscFunctionBegin; 3768 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3769 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3770 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3771 3772 /* Check for special case: each processor gets entire matrix columns */ 3773 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3774 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3775 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3776 3777 if (call == MAT_REUSE_MATRIX) { 3778 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3779 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3780 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3781 } else { 3782 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3783 } 3784 3785 /* 3786 m - number of local rows 3787 n - number of columns (same on all processors) 3788 rstart - first row in new global matrix generated 3789 */ 3790 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3791 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3792 if (call == MAT_INITIAL_MATRIX) { 3793 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3794 ii = aij->i; 3795 jj = aij->j; 3796 3797 /* 3798 Determine the number of non-zeros in the diagonal and off-diagonal 3799 portions of the matrix in order to do correct preallocation 3800 */ 3801 3802 /* first get start and end of "diagonal" columns */ 3803 if (csize == PETSC_DECIDE) { 3804 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3805 if (mglobal == n) { /* square matrix */ 3806 nlocal = m; 3807 } else { 3808 nlocal = n/size + ((n % size) > rank); 3809 } 3810 } else { 3811 nlocal = csize; 3812 } 3813 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3814 rstart = rend - nlocal; 3815 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3816 3817 /* next, compute all the lengths */ 3818 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3819 olens = dlens + m; 3820 for (i=0; i<m; i++) { 3821 jend = ii[i+1] - ii[i]; 3822 olen = 0; 3823 dlen = 0; 3824 for (j=0; j<jend; j++) { 3825 if (*jj < rstart || *jj >= rend) olen++; 3826 else dlen++; 3827 jj++; 3828 } 3829 olens[i] = olen; 3830 dlens[i] = dlen; 3831 } 3832 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3833 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3834 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3835 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3836 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3837 ierr = PetscFree(dlens);CHKERRQ(ierr); 3838 } else { 3839 PetscInt ml,nl; 3840 3841 M = *newmat; 3842 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3843 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3844 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3845 /* 3846 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3847 rather than the slower MatSetValues(). 3848 */ 3849 M->was_assembled = PETSC_TRUE; 3850 M->assembled = PETSC_FALSE; 3851 } 3852 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3853 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3854 ii = aij->i; 3855 jj = aij->j; 3856 aa = aij->a; 3857 for (i=0; i<m; i++) { 3858 row = rstart + i; 3859 nz = ii[i+1] - ii[i]; 3860 cwork = jj; jj += nz; 3861 vwork = aa; aa += nz; 3862 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3863 } 3864 3865 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 *newmat = M; 3868 3869 /* save submatrix used in processor for next request */ 3870 if (call == MAT_INITIAL_MATRIX) { 3871 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3872 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3873 } 3874 PetscFunctionReturn(0); 3875 } 3876 3877 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3878 { 3879 PetscInt m,cstart, cend,j,nnz,i,d; 3880 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3881 const PetscInt *JJ; 3882 PetscScalar *values; 3883 PetscErrorCode ierr; 3884 PetscBool nooffprocentries; 3885 3886 PetscFunctionBegin; 3887 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3888 3889 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3890 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3891 m = B->rmap->n; 3892 cstart = B->cmap->rstart; 3893 cend = B->cmap->rend; 3894 rstart = B->rmap->rstart; 3895 3896 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3897 3898 #if defined(PETSC_USE_DEBUG) 3899 for (i=0; i<m; i++) { 3900 nnz = Ii[i+1]- Ii[i]; 3901 JJ = J + Ii[i]; 3902 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3903 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3904 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3905 } 3906 #endif 3907 3908 for (i=0; i<m; i++) { 3909 nnz = Ii[i+1]- Ii[i]; 3910 JJ = J + Ii[i]; 3911 nnz_max = PetscMax(nnz_max,nnz); 3912 d = 0; 3913 for (j=0; j<nnz; j++) { 3914 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3915 } 3916 d_nnz[i] = d; 3917 o_nnz[i] = nnz - d; 3918 } 3919 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3920 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3921 3922 if (v) values = (PetscScalar*)v; 3923 else { 3924 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3925 } 3926 3927 for (i=0; i<m; i++) { 3928 ii = i + rstart; 3929 nnz = Ii[i+1]- Ii[i]; 3930 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3931 } 3932 nooffprocentries = B->nooffprocentries; 3933 B->nooffprocentries = PETSC_TRUE; 3934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 B->nooffprocentries = nooffprocentries; 3937 3938 if (!v) { 3939 ierr = PetscFree(values);CHKERRQ(ierr); 3940 } 3941 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3942 PetscFunctionReturn(0); 3943 } 3944 3945 /*@ 3946 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3947 (the default parallel PETSc format). 3948 3949 Collective on MPI_Comm 3950 3951 Input Parameters: 3952 + B - the matrix 3953 . i - the indices into j for the start of each local row (starts with zero) 3954 . j - the column indices for each local row (starts with zero) 3955 - v - optional values in the matrix 3956 3957 Level: developer 3958 3959 Notes: 3960 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3961 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3962 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3963 3964 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3965 3966 The format which is used for the sparse matrix input, is equivalent to a 3967 row-major ordering.. i.e for the following matrix, the input data expected is 3968 as shown 3969 3970 $ 1 0 0 3971 $ 2 0 3 P0 3972 $ ------- 3973 $ 4 5 6 P1 3974 $ 3975 $ Process0 [P0]: rows_owned=[0,1] 3976 $ i = {0,1,3} [size = nrow+1 = 2+1] 3977 $ j = {0,0,2} [size = 3] 3978 $ v = {1,2,3} [size = 3] 3979 $ 3980 $ Process1 [P1]: rows_owned=[2] 3981 $ i = {0,3} [size = nrow+1 = 1+1] 3982 $ j = {0,1,2} [size = 3] 3983 $ v = {4,5,6} [size = 3] 3984 3985 .keywords: matrix, aij, compressed row, sparse, parallel 3986 3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3988 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3989 @*/ 3990 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3991 { 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3996 PetscFunctionReturn(0); 3997 } 3998 3999 /*@C 4000 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4001 (the default parallel PETSc format). For good matrix assembly performance 4002 the user should preallocate the matrix storage by setting the parameters 4003 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4004 performance can be increased by more than a factor of 50. 4005 4006 Collective on MPI_Comm 4007 4008 Input Parameters: 4009 + B - the matrix 4010 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4011 (same value is used for all local rows) 4012 . d_nnz - array containing the number of nonzeros in the various rows of the 4013 DIAGONAL portion of the local submatrix (possibly different for each row) 4014 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4015 The size of this array is equal to the number of local rows, i.e 'm'. 4016 For matrices that will be factored, you must leave room for (and set) 4017 the diagonal entry even if it is zero. 4018 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4019 submatrix (same value is used for all local rows). 4020 - o_nnz - array containing the number of nonzeros in the various rows of the 4021 OFF-DIAGONAL portion of the local submatrix (possibly different for 4022 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4023 structure. The size of this array is equal to the number 4024 of local rows, i.e 'm'. 4025 4026 If the *_nnz parameter is given then the *_nz parameter is ignored 4027 4028 The AIJ format (also called the Yale sparse matrix format or 4029 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4030 storage. The stored row and column indices begin with zero. 4031 See Users-Manual: ch_mat for details. 4032 4033 The parallel matrix is partitioned such that the first m0 rows belong to 4034 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4035 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4036 4037 The DIAGONAL portion of the local submatrix of a processor can be defined 4038 as the submatrix which is obtained by extraction the part corresponding to 4039 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4040 first row that belongs to the processor, r2 is the last row belonging to 4041 the this processor, and c1-c2 is range of indices of the local part of a 4042 vector suitable for applying the matrix to. This is an mxn matrix. In the 4043 common case of a square matrix, the row and column ranges are the same and 4044 the DIAGONAL part is also square. The remaining portion of the local 4045 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4046 4047 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4048 4049 You can call MatGetInfo() to get information on how effective the preallocation was; 4050 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4051 You can also run with the option -info and look for messages with the string 4052 malloc in them to see if additional memory allocation was needed. 4053 4054 Example usage: 4055 4056 Consider the following 8x8 matrix with 34 non-zero values, that is 4057 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4058 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4059 as follows: 4060 4061 .vb 4062 1 2 0 | 0 3 0 | 0 4 4063 Proc0 0 5 6 | 7 0 0 | 8 0 4064 9 0 10 | 11 0 0 | 12 0 4065 ------------------------------------- 4066 13 0 14 | 15 16 17 | 0 0 4067 Proc1 0 18 0 | 19 20 21 | 0 0 4068 0 0 0 | 22 23 0 | 24 0 4069 ------------------------------------- 4070 Proc2 25 26 27 | 0 0 28 | 29 0 4071 30 0 0 | 31 32 33 | 0 34 4072 .ve 4073 4074 This can be represented as a collection of submatrices as: 4075 4076 .vb 4077 A B C 4078 D E F 4079 G H I 4080 .ve 4081 4082 Where the submatrices A,B,C are owned by proc0, D,E,F are 4083 owned by proc1, G,H,I are owned by proc2. 4084 4085 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4086 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4087 The 'M','N' parameters are 8,8, and have the same values on all procs. 4088 4089 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4090 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4091 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4092 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4093 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4094 matrix, ans [DF] as another SeqAIJ matrix. 4095 4096 When d_nz, o_nz parameters are specified, d_nz storage elements are 4097 allocated for every row of the local diagonal submatrix, and o_nz 4098 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4099 One way to choose d_nz and o_nz is to use the max nonzerors per local 4100 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4101 In this case, the values of d_nz,o_nz are: 4102 .vb 4103 proc0 : dnz = 2, o_nz = 2 4104 proc1 : dnz = 3, o_nz = 2 4105 proc2 : dnz = 1, o_nz = 4 4106 .ve 4107 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4108 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4109 for proc3. i.e we are using 12+15+10=37 storage locations to store 4110 34 values. 4111 4112 When d_nnz, o_nnz parameters are specified, the storage is specified 4113 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4114 In the above case the values for d_nnz,o_nnz are: 4115 .vb 4116 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4117 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4118 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4119 .ve 4120 Here the space allocated is sum of all the above values i.e 34, and 4121 hence pre-allocation is perfect. 4122 4123 Level: intermediate 4124 4125 .keywords: matrix, aij, compressed row, sparse, parallel 4126 4127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4128 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4129 @*/ 4130 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4131 { 4132 PetscErrorCode ierr; 4133 4134 PetscFunctionBegin; 4135 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4136 PetscValidType(B,1); 4137 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4138 PetscFunctionReturn(0); 4139 } 4140 4141 /*@ 4142 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4143 CSR format the local rows. 4144 4145 Collective on MPI_Comm 4146 4147 Input Parameters: 4148 + comm - MPI communicator 4149 . m - number of local rows (Cannot be PETSC_DECIDE) 4150 . n - This value should be the same as the local size used in creating the 4151 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4152 calculated if N is given) For square matrices n is almost always m. 4153 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4154 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4155 . i - row indices 4156 . j - column indices 4157 - a - matrix values 4158 4159 Output Parameter: 4160 . mat - the matrix 4161 4162 Level: intermediate 4163 4164 Notes: 4165 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4166 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4167 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4168 4169 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4170 4171 The format which is used for the sparse matrix input, is equivalent to a 4172 row-major ordering.. i.e for the following matrix, the input data expected is 4173 as shown 4174 4175 $ 1 0 0 4176 $ 2 0 3 P0 4177 $ ------- 4178 $ 4 5 6 P1 4179 $ 4180 $ Process0 [P0]: rows_owned=[0,1] 4181 $ i = {0,1,3} [size = nrow+1 = 2+1] 4182 $ j = {0,0,2} [size = 3] 4183 $ v = {1,2,3} [size = 3] 4184 $ 4185 $ Process1 [P1]: rows_owned=[2] 4186 $ i = {0,3} [size = nrow+1 = 1+1] 4187 $ j = {0,1,2} [size = 3] 4188 $ v = {4,5,6} [size = 3] 4189 4190 .keywords: matrix, aij, compressed row, sparse, parallel 4191 4192 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4193 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4194 @*/ 4195 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4196 { 4197 PetscErrorCode ierr; 4198 4199 PetscFunctionBegin; 4200 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4201 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4202 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4203 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4204 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4205 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4206 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4207 PetscFunctionReturn(0); 4208 } 4209 4210 /*@C 4211 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4212 (the default parallel PETSc format). For good matrix assembly performance 4213 the user should preallocate the matrix storage by setting the parameters 4214 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4215 performance can be increased by more than a factor of 50. 4216 4217 Collective on MPI_Comm 4218 4219 Input Parameters: 4220 + comm - MPI communicator 4221 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4222 This value should be the same as the local size used in creating the 4223 y vector for the matrix-vector product y = Ax. 4224 . n - This value should be the same as the local size used in creating the 4225 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4226 calculated if N is given) For square matrices n is almost always m. 4227 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4228 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4229 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4230 (same value is used for all local rows) 4231 . d_nnz - array containing the number of nonzeros in the various rows of the 4232 DIAGONAL portion of the local submatrix (possibly different for each row) 4233 or NULL, if d_nz is used to specify the nonzero structure. 4234 The size of this array is equal to the number of local rows, i.e 'm'. 4235 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4236 submatrix (same value is used for all local rows). 4237 - o_nnz - array containing the number of nonzeros in the various rows of the 4238 OFF-DIAGONAL portion of the local submatrix (possibly different for 4239 each row) or NULL, if o_nz is used to specify the nonzero 4240 structure. The size of this array is equal to the number 4241 of local rows, i.e 'm'. 4242 4243 Output Parameter: 4244 . A - the matrix 4245 4246 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4247 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4248 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4249 4250 Notes: 4251 If the *_nnz parameter is given then the *_nz parameter is ignored 4252 4253 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4254 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4255 storage requirements for this matrix. 4256 4257 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4258 processor than it must be used on all processors that share the object for 4259 that argument. 4260 4261 The user MUST specify either the local or global matrix dimensions 4262 (possibly both). 4263 4264 The parallel matrix is partitioned across processors such that the 4265 first m0 rows belong to process 0, the next m1 rows belong to 4266 process 1, the next m2 rows belong to process 2 etc.. where 4267 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4268 values corresponding to [m x N] submatrix. 4269 4270 The columns are logically partitioned with the n0 columns belonging 4271 to 0th partition, the next n1 columns belonging to the next 4272 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4273 4274 The DIAGONAL portion of the local submatrix on any given processor 4275 is the submatrix corresponding to the rows and columns m,n 4276 corresponding to the given processor. i.e diagonal matrix on 4277 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4278 etc. The remaining portion of the local submatrix [m x (N-n)] 4279 constitute the OFF-DIAGONAL portion. The example below better 4280 illustrates this concept. 4281 4282 For a square global matrix we define each processor's diagonal portion 4283 to be its local rows and the corresponding columns (a square submatrix); 4284 each processor's off-diagonal portion encompasses the remainder of the 4285 local matrix (a rectangular submatrix). 4286 4287 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4288 4289 When calling this routine with a single process communicator, a matrix of 4290 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4291 type of communicator, use the construction mechanism 4292 .vb 4293 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4294 .ve 4295 4296 $ MatCreate(...,&A); 4297 $ MatSetType(A,MATMPIAIJ); 4298 $ MatSetSizes(A, m,n,M,N); 4299 $ MatMPIAIJSetPreallocation(A,...); 4300 4301 By default, this format uses inodes (identical nodes) when possible. 4302 We search for consecutive rows with the same nonzero structure, thereby 4303 reusing matrix information to achieve increased efficiency. 4304 4305 Options Database Keys: 4306 + -mat_no_inode - Do not use inodes 4307 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4308 4309 4310 4311 Example usage: 4312 4313 Consider the following 8x8 matrix with 34 non-zero values, that is 4314 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4315 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4316 as follows 4317 4318 .vb 4319 1 2 0 | 0 3 0 | 0 4 4320 Proc0 0 5 6 | 7 0 0 | 8 0 4321 9 0 10 | 11 0 0 | 12 0 4322 ------------------------------------- 4323 13 0 14 | 15 16 17 | 0 0 4324 Proc1 0 18 0 | 19 20 21 | 0 0 4325 0 0 0 | 22 23 0 | 24 0 4326 ------------------------------------- 4327 Proc2 25 26 27 | 0 0 28 | 29 0 4328 30 0 0 | 31 32 33 | 0 34 4329 .ve 4330 4331 This can be represented as a collection of submatrices as 4332 4333 .vb 4334 A B C 4335 D E F 4336 G H I 4337 .ve 4338 4339 Where the submatrices A,B,C are owned by proc0, D,E,F are 4340 owned by proc1, G,H,I are owned by proc2. 4341 4342 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4343 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4344 The 'M','N' parameters are 8,8, and have the same values on all procs. 4345 4346 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4347 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4348 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4349 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4350 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4351 matrix, ans [DF] as another SeqAIJ matrix. 4352 4353 When d_nz, o_nz parameters are specified, d_nz storage elements are 4354 allocated for every row of the local diagonal submatrix, and o_nz 4355 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4356 One way to choose d_nz and o_nz is to use the max nonzerors per local 4357 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4358 In this case, the values of d_nz,o_nz are 4359 .vb 4360 proc0 : dnz = 2, o_nz = 2 4361 proc1 : dnz = 3, o_nz = 2 4362 proc2 : dnz = 1, o_nz = 4 4363 .ve 4364 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4365 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4366 for proc3. i.e we are using 12+15+10=37 storage locations to store 4367 34 values. 4368 4369 When d_nnz, o_nnz parameters are specified, the storage is specified 4370 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4371 In the above case the values for d_nnz,o_nnz are 4372 .vb 4373 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4374 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4375 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4376 .ve 4377 Here the space allocated is sum of all the above values i.e 34, and 4378 hence pre-allocation is perfect. 4379 4380 Level: intermediate 4381 4382 .keywords: matrix, aij, compressed row, sparse, parallel 4383 4384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4385 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4386 @*/ 4387 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4388 { 4389 PetscErrorCode ierr; 4390 PetscMPIInt size; 4391 4392 PetscFunctionBegin; 4393 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4394 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4395 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4396 if (size > 1) { 4397 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4398 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4399 } else { 4400 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4401 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4402 } 4403 PetscFunctionReturn(0); 4404 } 4405 4406 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4407 { 4408 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4409 PetscBool flg; 4410 PetscErrorCode ierr; 4411 4412 PetscFunctionBegin; 4413 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4414 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4415 if (Ad) *Ad = a->A; 4416 if (Ao) *Ao = a->B; 4417 if (colmap) *colmap = a->garray; 4418 PetscFunctionReturn(0); 4419 } 4420 4421 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4422 { 4423 PetscErrorCode ierr; 4424 PetscInt m,N,i,rstart,nnz,Ii; 4425 PetscInt *indx; 4426 PetscScalar *values; 4427 4428 PetscFunctionBegin; 4429 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4430 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4431 PetscInt *dnz,*onz,sum,bs,cbs; 4432 4433 if (n == PETSC_DECIDE) { 4434 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4435 } 4436 /* Check sum(n) = N */ 4437 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4438 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4439 4440 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4441 rstart -= m; 4442 4443 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4444 for (i=0; i<m; i++) { 4445 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4446 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4447 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4448 } 4449 4450 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4451 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4452 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4453 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4454 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4456 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4457 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4458 } 4459 4460 /* numeric phase */ 4461 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4462 for (i=0; i<m; i++) { 4463 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4464 Ii = i + rstart; 4465 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4466 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4467 } 4468 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4470 PetscFunctionReturn(0); 4471 } 4472 4473 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4474 { 4475 PetscErrorCode ierr; 4476 PetscMPIInt rank; 4477 PetscInt m,N,i,rstart,nnz; 4478 size_t len; 4479 const PetscInt *indx; 4480 PetscViewer out; 4481 char *name; 4482 Mat B; 4483 const PetscScalar *values; 4484 4485 PetscFunctionBegin; 4486 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4487 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4488 /* Should this be the type of the diagonal block of A? */ 4489 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4490 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4491 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4492 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4493 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4494 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4495 for (i=0; i<m; i++) { 4496 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4497 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4498 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4499 } 4500 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4501 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4502 4503 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4504 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4505 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4506 sprintf(name,"%s.%d",outfile,rank); 4507 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4508 ierr = PetscFree(name);CHKERRQ(ierr); 4509 ierr = MatView(B,out);CHKERRQ(ierr); 4510 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4511 ierr = MatDestroy(&B);CHKERRQ(ierr); 4512 PetscFunctionReturn(0); 4513 } 4514 4515 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4516 { 4517 PetscErrorCode ierr; 4518 Mat_Merge_SeqsToMPI *merge; 4519 PetscContainer container; 4520 4521 PetscFunctionBegin; 4522 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4523 if (container) { 4524 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4537 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4538 ierr = PetscFree(merge);CHKERRQ(ierr); 4539 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4540 } 4541 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 #include <../src/mat/utils/freespace.h> 4546 #include <petscbt.h> 4547 4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4549 { 4550 PetscErrorCode ierr; 4551 MPI_Comm comm; 4552 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4553 PetscMPIInt size,rank,taga,*len_s; 4554 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4555 PetscInt proc,m; 4556 PetscInt **buf_ri,**buf_rj; 4557 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4558 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4559 MPI_Request *s_waits,*r_waits; 4560 MPI_Status *status; 4561 MatScalar *aa=a->a; 4562 MatScalar **abuf_r,*ba_i; 4563 Mat_Merge_SeqsToMPI *merge; 4564 PetscContainer container; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4568 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4569 4570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4571 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4572 4573 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4574 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4575 4576 bi = merge->bi; 4577 bj = merge->bj; 4578 buf_ri = merge->buf_ri; 4579 buf_rj = merge->buf_rj; 4580 4581 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4582 owners = merge->rowmap->range; 4583 len_s = merge->len_s; 4584 4585 /* send and recv matrix values */ 4586 /*-----------------------------*/ 4587 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4588 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4589 4590 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4591 for (proc=0,k=0; proc<size; proc++) { 4592 if (!len_s[proc]) continue; 4593 i = owners[proc]; 4594 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4595 k++; 4596 } 4597 4598 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4599 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4600 ierr = PetscFree(status);CHKERRQ(ierr); 4601 4602 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4603 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4604 4605 /* insert mat values of mpimat */ 4606 /*----------------------------*/ 4607 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4608 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4609 4610 for (k=0; k<merge->nrecv; k++) { 4611 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4612 nrows = *(buf_ri_k[k]); 4613 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4614 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4615 } 4616 4617 /* set values of ba */ 4618 m = merge->rowmap->n; 4619 for (i=0; i<m; i++) { 4620 arow = owners[rank] + i; 4621 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4622 bnzi = bi[i+1] - bi[i]; 4623 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4624 4625 /* add local non-zero vals of this proc's seqmat into ba */ 4626 anzi = ai[arow+1] - ai[arow]; 4627 aj = a->j + ai[arow]; 4628 aa = a->a + ai[arow]; 4629 nextaj = 0; 4630 for (j=0; nextaj<anzi; j++) { 4631 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4632 ba_i[j] += aa[nextaj++]; 4633 } 4634 } 4635 4636 /* add received vals into ba */ 4637 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4638 /* i-th row */ 4639 if (i == *nextrow[k]) { 4640 anzi = *(nextai[k]+1) - *nextai[k]; 4641 aj = buf_rj[k] + *(nextai[k]); 4642 aa = abuf_r[k] + *(nextai[k]); 4643 nextaj = 0; 4644 for (j=0; nextaj<anzi; j++) { 4645 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4646 ba_i[j] += aa[nextaj++]; 4647 } 4648 } 4649 nextrow[k]++; nextai[k]++; 4650 } 4651 } 4652 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4653 } 4654 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 4657 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4658 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4659 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4660 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4661 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4662 PetscFunctionReturn(0); 4663 } 4664 4665 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4666 { 4667 PetscErrorCode ierr; 4668 Mat B_mpi; 4669 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4670 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4671 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4672 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4673 PetscInt len,proc,*dnz,*onz,bs,cbs; 4674 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4675 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4676 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4677 MPI_Status *status; 4678 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4679 PetscBT lnkbt; 4680 Mat_Merge_SeqsToMPI *merge; 4681 PetscContainer container; 4682 4683 PetscFunctionBegin; 4684 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4685 4686 /* make sure it is a PETSc comm */ 4687 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4688 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4689 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4690 4691 ierr = PetscNew(&merge);CHKERRQ(ierr); 4692 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4693 4694 /* determine row ownership */ 4695 /*---------------------------------------------------------*/ 4696 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4697 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4698 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4699 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4700 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4701 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4702 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4703 4704 m = merge->rowmap->n; 4705 owners = merge->rowmap->range; 4706 4707 /* determine the number of messages to send, their lengths */ 4708 /*---------------------------------------------------------*/ 4709 len_s = merge->len_s; 4710 4711 len = 0; /* length of buf_si[] */ 4712 merge->nsend = 0; 4713 for (proc=0; proc<size; proc++) { 4714 len_si[proc] = 0; 4715 if (proc == rank) { 4716 len_s[proc] = 0; 4717 } else { 4718 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4719 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4720 } 4721 if (len_s[proc]) { 4722 merge->nsend++; 4723 nrows = 0; 4724 for (i=owners[proc]; i<owners[proc+1]; i++) { 4725 if (ai[i+1] > ai[i]) nrows++; 4726 } 4727 len_si[proc] = 2*(nrows+1); 4728 len += len_si[proc]; 4729 } 4730 } 4731 4732 /* determine the number and length of messages to receive for ij-structure */ 4733 /*-------------------------------------------------------------------------*/ 4734 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4735 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4736 4737 /* post the Irecv of j-structure */ 4738 /*-------------------------------*/ 4739 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4740 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4741 4742 /* post the Isend of j-structure */ 4743 /*--------------------------------*/ 4744 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4745 4746 for (proc=0, k=0; proc<size; proc++) { 4747 if (!len_s[proc]) continue; 4748 i = owners[proc]; 4749 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4750 k++; 4751 } 4752 4753 /* receives and sends of j-structure are complete */ 4754 /*------------------------------------------------*/ 4755 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4756 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4757 4758 /* send and recv i-structure */ 4759 /*---------------------------*/ 4760 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4761 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4762 4763 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4764 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4765 for (proc=0,k=0; proc<size; proc++) { 4766 if (!len_s[proc]) continue; 4767 /* form outgoing message for i-structure: 4768 buf_si[0]: nrows to be sent 4769 [1:nrows]: row index (global) 4770 [nrows+1:2*nrows+1]: i-structure index 4771 */ 4772 /*-------------------------------------------*/ 4773 nrows = len_si[proc]/2 - 1; 4774 buf_si_i = buf_si + nrows+1; 4775 buf_si[0] = nrows; 4776 buf_si_i[0] = 0; 4777 nrows = 0; 4778 for (i=owners[proc]; i<owners[proc+1]; i++) { 4779 anzi = ai[i+1] - ai[i]; 4780 if (anzi) { 4781 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4782 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4783 nrows++; 4784 } 4785 } 4786 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4787 k++; 4788 buf_si += len_si[proc]; 4789 } 4790 4791 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4792 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4793 4794 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4795 for (i=0; i<merge->nrecv; i++) { 4796 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4797 } 4798 4799 ierr = PetscFree(len_si);CHKERRQ(ierr); 4800 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4801 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4802 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4803 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4804 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4805 ierr = PetscFree(status);CHKERRQ(ierr); 4806 4807 /* compute a local seq matrix in each processor */ 4808 /*----------------------------------------------*/ 4809 /* allocate bi array and free space for accumulating nonzero column info */ 4810 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4811 bi[0] = 0; 4812 4813 /* create and initialize a linked list */ 4814 nlnk = N+1; 4815 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4816 4817 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4818 len = ai[owners[rank+1]] - ai[owners[rank]]; 4819 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4820 4821 current_space = free_space; 4822 4823 /* determine symbolic info for each local row */ 4824 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4825 4826 for (k=0; k<merge->nrecv; k++) { 4827 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4828 nrows = *buf_ri_k[k]; 4829 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4830 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4831 } 4832 4833 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4834 len = 0; 4835 for (i=0; i<m; i++) { 4836 bnzi = 0; 4837 /* add local non-zero cols of this proc's seqmat into lnk */ 4838 arow = owners[rank] + i; 4839 anzi = ai[arow+1] - ai[arow]; 4840 aj = a->j + ai[arow]; 4841 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4842 bnzi += nlnk; 4843 /* add received col data into lnk */ 4844 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4845 if (i == *nextrow[k]) { /* i-th row */ 4846 anzi = *(nextai[k]+1) - *nextai[k]; 4847 aj = buf_rj[k] + *nextai[k]; 4848 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4849 bnzi += nlnk; 4850 nextrow[k]++; nextai[k]++; 4851 } 4852 } 4853 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4854 4855 /* if free space is not available, make more free space */ 4856 if (current_space->local_remaining<bnzi) { 4857 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4858 nspacedouble++; 4859 } 4860 /* copy data into free space, then initialize lnk */ 4861 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4862 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4863 4864 current_space->array += bnzi; 4865 current_space->local_used += bnzi; 4866 current_space->local_remaining -= bnzi; 4867 4868 bi[i+1] = bi[i] + bnzi; 4869 } 4870 4871 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4872 4873 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4874 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4875 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4876 4877 /* create symbolic parallel matrix B_mpi */ 4878 /*---------------------------------------*/ 4879 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4880 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4881 if (n==PETSC_DECIDE) { 4882 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4883 } else { 4884 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4885 } 4886 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4887 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4888 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4889 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4890 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4891 4892 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4893 B_mpi->assembled = PETSC_FALSE; 4894 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4895 merge->bi = bi; 4896 merge->bj = bj; 4897 merge->buf_ri = buf_ri; 4898 merge->buf_rj = buf_rj; 4899 merge->coi = NULL; 4900 merge->coj = NULL; 4901 merge->owners_co = NULL; 4902 4903 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4904 4905 /* attach the supporting struct to B_mpi for reuse */ 4906 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4907 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4908 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4909 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4910 *mpimat = B_mpi; 4911 4912 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4913 PetscFunctionReturn(0); 4914 } 4915 4916 /*@C 4917 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4918 matrices from each processor 4919 4920 Collective on MPI_Comm 4921 4922 Input Parameters: 4923 + comm - the communicators the parallel matrix will live on 4924 . seqmat - the input sequential matrices 4925 . m - number of local rows (or PETSC_DECIDE) 4926 . n - number of local columns (or PETSC_DECIDE) 4927 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4928 4929 Output Parameter: 4930 . mpimat - the parallel matrix generated 4931 4932 Level: advanced 4933 4934 Notes: 4935 The dimensions of the sequential matrix in each processor MUST be the same. 4936 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4937 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4938 @*/ 4939 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4940 { 4941 PetscErrorCode ierr; 4942 PetscMPIInt size; 4943 4944 PetscFunctionBegin; 4945 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4946 if (size == 1) { 4947 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4948 if (scall == MAT_INITIAL_MATRIX) { 4949 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4950 } else { 4951 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4952 } 4953 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4954 PetscFunctionReturn(0); 4955 } 4956 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4957 if (scall == MAT_INITIAL_MATRIX) { 4958 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4959 } 4960 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4961 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 /*@ 4966 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4967 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4968 with MatGetSize() 4969 4970 Not Collective 4971 4972 Input Parameters: 4973 + A - the matrix 4974 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4975 4976 Output Parameter: 4977 . A_loc - the local sequential matrix generated 4978 4979 Level: developer 4980 4981 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4982 4983 @*/ 4984 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4985 { 4986 PetscErrorCode ierr; 4987 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4988 Mat_SeqAIJ *mat,*a,*b; 4989 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4990 MatScalar *aa,*ba,*cam; 4991 PetscScalar *ca; 4992 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4993 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4994 PetscBool match; 4995 MPI_Comm comm; 4996 PetscMPIInt size; 4997 4998 PetscFunctionBegin; 4999 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5000 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5001 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5002 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5003 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5004 5005 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5006 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5007 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5008 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5009 aa = a->a; ba = b->a; 5010 if (scall == MAT_INITIAL_MATRIX) { 5011 if (size == 1) { 5012 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5013 PetscFunctionReturn(0); 5014 } 5015 5016 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5017 ci[0] = 0; 5018 for (i=0; i<am; i++) { 5019 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5020 } 5021 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5022 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5023 k = 0; 5024 for (i=0; i<am; i++) { 5025 ncols_o = bi[i+1] - bi[i]; 5026 ncols_d = ai[i+1] - ai[i]; 5027 /* off-diagonal portion of A */ 5028 for (jo=0; jo<ncols_o; jo++) { 5029 col = cmap[*bj]; 5030 if (col >= cstart) break; 5031 cj[k] = col; bj++; 5032 ca[k++] = *ba++; 5033 } 5034 /* diagonal portion of A */ 5035 for (j=0; j<ncols_d; j++) { 5036 cj[k] = cstart + *aj++; 5037 ca[k++] = *aa++; 5038 } 5039 /* off-diagonal portion of A */ 5040 for (j=jo; j<ncols_o; j++) { 5041 cj[k] = cmap[*bj++]; 5042 ca[k++] = *ba++; 5043 } 5044 } 5045 /* put together the new matrix */ 5046 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5047 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5048 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5049 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5050 mat->free_a = PETSC_TRUE; 5051 mat->free_ij = PETSC_TRUE; 5052 mat->nonew = 0; 5053 } else if (scall == MAT_REUSE_MATRIX) { 5054 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5055 ci = mat->i; cj = mat->j; cam = mat->a; 5056 for (i=0; i<am; i++) { 5057 /* off-diagonal portion of A */ 5058 ncols_o = bi[i+1] - bi[i]; 5059 for (jo=0; jo<ncols_o; jo++) { 5060 col = cmap[*bj]; 5061 if (col >= cstart) break; 5062 *cam++ = *ba++; bj++; 5063 } 5064 /* diagonal portion of A */ 5065 ncols_d = ai[i+1] - ai[i]; 5066 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5067 /* off-diagonal portion of A */ 5068 for (j=jo; j<ncols_o; j++) { 5069 *cam++ = *ba++; bj++; 5070 } 5071 } 5072 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5073 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5074 PetscFunctionReturn(0); 5075 } 5076 5077 /*@C 5078 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5079 5080 Not Collective 5081 5082 Input Parameters: 5083 + A - the matrix 5084 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5085 - row, col - index sets of rows and columns to extract (or NULL) 5086 5087 Output Parameter: 5088 . A_loc - the local sequential matrix generated 5089 5090 Level: developer 5091 5092 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5093 5094 @*/ 5095 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5096 { 5097 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5098 PetscErrorCode ierr; 5099 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5100 IS isrowa,iscola; 5101 Mat *aloc; 5102 PetscBool match; 5103 5104 PetscFunctionBegin; 5105 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5106 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5107 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5108 if (!row) { 5109 start = A->rmap->rstart; end = A->rmap->rend; 5110 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5111 } else { 5112 isrowa = *row; 5113 } 5114 if (!col) { 5115 start = A->cmap->rstart; 5116 cmap = a->garray; 5117 nzA = a->A->cmap->n; 5118 nzB = a->B->cmap->n; 5119 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5120 ncols = 0; 5121 for (i=0; i<nzB; i++) { 5122 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5123 else break; 5124 } 5125 imark = i; 5126 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5127 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5128 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5129 } else { 5130 iscola = *col; 5131 } 5132 if (scall != MAT_INITIAL_MATRIX) { 5133 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5134 aloc[0] = *A_loc; 5135 } 5136 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5137 if (!col) { /* attach global id of condensed columns */ 5138 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5139 } 5140 *A_loc = aloc[0]; 5141 ierr = PetscFree(aloc);CHKERRQ(ierr); 5142 if (!row) { 5143 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5144 } 5145 if (!col) { 5146 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5147 } 5148 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5149 PetscFunctionReturn(0); 5150 } 5151 5152 /*@C 5153 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5154 5155 Collective on Mat 5156 5157 Input Parameters: 5158 + A,B - the matrices in mpiaij format 5159 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5160 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5161 5162 Output Parameter: 5163 + rowb, colb - index sets of rows and columns of B to extract 5164 - B_seq - the sequential matrix generated 5165 5166 Level: developer 5167 5168 @*/ 5169 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5170 { 5171 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5172 PetscErrorCode ierr; 5173 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5174 IS isrowb,iscolb; 5175 Mat *bseq=NULL; 5176 5177 PetscFunctionBegin; 5178 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5179 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5180 } 5181 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5182 5183 if (scall == MAT_INITIAL_MATRIX) { 5184 start = A->cmap->rstart; 5185 cmap = a->garray; 5186 nzA = a->A->cmap->n; 5187 nzB = a->B->cmap->n; 5188 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5189 ncols = 0; 5190 for (i=0; i<nzB; i++) { /* row < local row index */ 5191 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5192 else break; 5193 } 5194 imark = i; 5195 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5196 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5197 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5198 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5199 } else { 5200 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5201 isrowb = *rowb; iscolb = *colb; 5202 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5203 bseq[0] = *B_seq; 5204 } 5205 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5206 *B_seq = bseq[0]; 5207 ierr = PetscFree(bseq);CHKERRQ(ierr); 5208 if (!rowb) { 5209 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5210 } else { 5211 *rowb = isrowb; 5212 } 5213 if (!colb) { 5214 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5215 } else { 5216 *colb = iscolb; 5217 } 5218 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5219 PetscFunctionReturn(0); 5220 } 5221 5222 /* 5223 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5224 of the OFF-DIAGONAL portion of local A 5225 5226 Collective on Mat 5227 5228 Input Parameters: 5229 + A,B - the matrices in mpiaij format 5230 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5231 5232 Output Parameter: 5233 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5234 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5235 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5236 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5237 5238 Level: developer 5239 5240 */ 5241 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5242 { 5243 VecScatter_MPI_General *gen_to,*gen_from; 5244 PetscErrorCode ierr; 5245 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5246 Mat_SeqAIJ *b_oth; 5247 VecScatter ctx; 5248 MPI_Comm comm; 5249 PetscMPIInt *rprocs,*sprocs,tag,rank; 5250 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5251 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5252 PetscScalar *b_otha,*bufa,*bufA,*vals; 5253 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5254 MPI_Request *rwaits = NULL,*swaits = NULL; 5255 MPI_Status *sstatus,rstatus; 5256 PetscMPIInt jj,size; 5257 VecScatterType type; 5258 PetscBool mpi1; 5259 5260 PetscFunctionBegin; 5261 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5262 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5263 5264 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5265 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5266 } 5267 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5268 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5269 5270 if (size == 1) { 5271 startsj_s = NULL; 5272 bufa_ptr = NULL; 5273 *B_oth = NULL; 5274 PetscFunctionReturn(0); 5275 } 5276 5277 ctx = a->Mvctx; 5278 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5279 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5280 if (!mpi1) { 5281 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5282 thus create a->Mvctx_mpi1 */ 5283 if (!a->Mvctx_mpi1) { 5284 a->Mvctx_mpi1_flg = PETSC_TRUE; 5285 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5286 } 5287 ctx = a->Mvctx_mpi1; 5288 } 5289 tag = ((PetscObject)ctx)->tag; 5290 5291 gen_to = (VecScatter_MPI_General*)ctx->todata; 5292 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5293 nrecvs = gen_from->n; 5294 nsends = gen_to->n; 5295 5296 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5297 srow = gen_to->indices; /* local row index to be sent */ 5298 sstarts = gen_to->starts; 5299 sprocs = gen_to->procs; 5300 sstatus = gen_to->sstatus; 5301 sbs = gen_to->bs; 5302 rstarts = gen_from->starts; 5303 rprocs = gen_from->procs; 5304 rbs = gen_from->bs; 5305 5306 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5307 if (scall == MAT_INITIAL_MATRIX) { 5308 /* i-array */ 5309 /*---------*/ 5310 /* post receives */ 5311 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5312 for (i=0; i<nrecvs; i++) { 5313 rowlen = rvalues + rstarts[i]*rbs; 5314 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5315 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5316 } 5317 5318 /* pack the outgoing message */ 5319 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5320 5321 sstartsj[0] = 0; 5322 rstartsj[0] = 0; 5323 len = 0; /* total length of j or a array to be sent */ 5324 k = 0; 5325 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5326 for (i=0; i<nsends; i++) { 5327 rowlen = svalues + sstarts[i]*sbs; 5328 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5329 for (j=0; j<nrows; j++) { 5330 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5331 for (l=0; l<sbs; l++) { 5332 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5333 5334 rowlen[j*sbs+l] = ncols; 5335 5336 len += ncols; 5337 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5338 } 5339 k++; 5340 } 5341 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5342 5343 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5344 } 5345 /* recvs and sends of i-array are completed */ 5346 i = nrecvs; 5347 while (i--) { 5348 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5349 } 5350 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5351 ierr = PetscFree(svalues);CHKERRQ(ierr); 5352 5353 /* allocate buffers for sending j and a arrays */ 5354 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5355 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5356 5357 /* create i-array of B_oth */ 5358 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5359 5360 b_othi[0] = 0; 5361 len = 0; /* total length of j or a array to be received */ 5362 k = 0; 5363 for (i=0; i<nrecvs; i++) { 5364 rowlen = rvalues + rstarts[i]*rbs; 5365 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5366 for (j=0; j<nrows; j++) { 5367 b_othi[k+1] = b_othi[k] + rowlen[j]; 5368 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5369 k++; 5370 } 5371 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5372 } 5373 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5374 5375 /* allocate space for j and a arrrays of B_oth */ 5376 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5377 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5378 5379 /* j-array */ 5380 /*---------*/ 5381 /* post receives of j-array */ 5382 for (i=0; i<nrecvs; i++) { 5383 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5384 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5385 } 5386 5387 /* pack the outgoing message j-array */ 5388 k = 0; 5389 for (i=0; i<nsends; i++) { 5390 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5391 bufJ = bufj+sstartsj[i]; 5392 for (j=0; j<nrows; j++) { 5393 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5394 for (ll=0; ll<sbs; ll++) { 5395 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5396 for (l=0; l<ncols; l++) { 5397 *bufJ++ = cols[l]; 5398 } 5399 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5400 } 5401 } 5402 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5403 } 5404 5405 /* recvs and sends of j-array are completed */ 5406 i = nrecvs; 5407 while (i--) { 5408 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5409 } 5410 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5411 } else if (scall == MAT_REUSE_MATRIX) { 5412 sstartsj = *startsj_s; 5413 rstartsj = *startsj_r; 5414 bufa = *bufa_ptr; 5415 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5416 b_otha = b_oth->a; 5417 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5418 5419 /* a-array */ 5420 /*---------*/ 5421 /* post receives of a-array */ 5422 for (i=0; i<nrecvs; i++) { 5423 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5424 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5425 } 5426 5427 /* pack the outgoing message a-array */ 5428 k = 0; 5429 for (i=0; i<nsends; i++) { 5430 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5431 bufA = bufa+sstartsj[i]; 5432 for (j=0; j<nrows; j++) { 5433 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5434 for (ll=0; ll<sbs; ll++) { 5435 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5436 for (l=0; l<ncols; l++) { 5437 *bufA++ = vals[l]; 5438 } 5439 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5440 } 5441 } 5442 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5443 } 5444 /* recvs and sends of a-array are completed */ 5445 i = nrecvs; 5446 while (i--) { 5447 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5448 } 5449 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5450 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5451 5452 if (scall == MAT_INITIAL_MATRIX) { 5453 /* put together the new matrix */ 5454 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5455 5456 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5457 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5458 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5459 b_oth->free_a = PETSC_TRUE; 5460 b_oth->free_ij = PETSC_TRUE; 5461 b_oth->nonew = 0; 5462 5463 ierr = PetscFree(bufj);CHKERRQ(ierr); 5464 if (!startsj_s || !bufa_ptr) { 5465 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5466 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5467 } else { 5468 *startsj_s = sstartsj; 5469 *startsj_r = rstartsj; 5470 *bufa_ptr = bufa; 5471 } 5472 } 5473 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5474 PetscFunctionReturn(0); 5475 } 5476 5477 /*@C 5478 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5479 5480 Not Collective 5481 5482 Input Parameters: 5483 . A - The matrix in mpiaij format 5484 5485 Output Parameter: 5486 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5487 . colmap - A map from global column index to local index into lvec 5488 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5489 5490 Level: developer 5491 5492 @*/ 5493 #if defined(PETSC_USE_CTABLE) 5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5495 #else 5496 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5497 #endif 5498 { 5499 Mat_MPIAIJ *a; 5500 5501 PetscFunctionBegin; 5502 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5503 PetscValidPointer(lvec, 2); 5504 PetscValidPointer(colmap, 3); 5505 PetscValidPointer(multScatter, 4); 5506 a = (Mat_MPIAIJ*) A->data; 5507 if (lvec) *lvec = a->lvec; 5508 if (colmap) *colmap = a->colmap; 5509 if (multScatter) *multScatter = a->Mvctx; 5510 PetscFunctionReturn(0); 5511 } 5512 5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5516 #if defined(PETSC_HAVE_MKL_SPARSE) 5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5518 #endif 5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5520 #if defined(PETSC_HAVE_ELEMENTAL) 5521 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5522 #endif 5523 #if defined(PETSC_HAVE_HYPRE) 5524 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5526 #endif 5527 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5529 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5530 5531 /* 5532 Computes (B'*A')' since computing B*A directly is untenable 5533 5534 n p p 5535 ( ) ( ) ( ) 5536 m ( A ) * n ( B ) = m ( C ) 5537 ( ) ( ) ( ) 5538 5539 */ 5540 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5541 { 5542 PetscErrorCode ierr; 5543 Mat At,Bt,Ct; 5544 5545 PetscFunctionBegin; 5546 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5547 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5548 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5549 ierr = MatDestroy(&At);CHKERRQ(ierr); 5550 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5551 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5552 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5553 PetscFunctionReturn(0); 5554 } 5555 5556 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5557 { 5558 PetscErrorCode ierr; 5559 PetscInt m=A->rmap->n,n=B->cmap->n; 5560 Mat Cmat; 5561 5562 PetscFunctionBegin; 5563 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5564 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5565 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5566 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5567 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5568 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5569 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5570 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5571 5572 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5573 5574 *C = Cmat; 5575 PetscFunctionReturn(0); 5576 } 5577 5578 /* ----------------------------------------------------------------*/ 5579 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5580 { 5581 PetscErrorCode ierr; 5582 5583 PetscFunctionBegin; 5584 if (scall == MAT_INITIAL_MATRIX) { 5585 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5586 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5587 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5588 } 5589 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5590 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5591 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5592 PetscFunctionReturn(0); 5593 } 5594 5595 /*MC 5596 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5597 5598 Options Database Keys: 5599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5600 5601 Level: beginner 5602 5603 .seealso: MatCreateAIJ() 5604 M*/ 5605 5606 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5607 { 5608 Mat_MPIAIJ *b; 5609 PetscErrorCode ierr; 5610 PetscMPIInt size; 5611 5612 PetscFunctionBegin; 5613 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5614 5615 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5616 B->data = (void*)b; 5617 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5618 B->assembled = PETSC_FALSE; 5619 B->insertmode = NOT_SET_VALUES; 5620 b->size = size; 5621 5622 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5623 5624 /* build cache for off array entries formed */ 5625 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5626 5627 b->donotstash = PETSC_FALSE; 5628 b->colmap = 0; 5629 b->garray = 0; 5630 b->roworiented = PETSC_TRUE; 5631 5632 /* stuff used for matrix vector multiply */ 5633 b->lvec = NULL; 5634 b->Mvctx = NULL; 5635 5636 /* stuff for MatGetRow() */ 5637 b->rowindices = 0; 5638 b->rowvalues = 0; 5639 b->getrowactive = PETSC_FALSE; 5640 5641 /* flexible pointer used in CUSP/CUSPARSE classes */ 5642 b->spptr = NULL; 5643 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5654 #if defined(PETSC_HAVE_MKL_SPARSE) 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5656 #endif 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5659 #if defined(PETSC_HAVE_ELEMENTAL) 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5661 #endif 5662 #if defined(PETSC_HAVE_HYPRE) 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5664 #endif 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5670 #if defined(PETSC_HAVE_HYPRE) 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5672 #endif 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5675 PetscFunctionReturn(0); 5676 } 5677 5678 /*@C 5679 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5680 and "off-diagonal" part of the matrix in CSR format. 5681 5682 Collective on MPI_Comm 5683 5684 Input Parameters: 5685 + comm - MPI communicator 5686 . m - number of local rows (Cannot be PETSC_DECIDE) 5687 . n - This value should be the same as the local size used in creating the 5688 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5689 calculated if N is given) For square matrices n is almost always m. 5690 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5691 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5692 . i - row indices for "diagonal" portion of matrix 5693 . j - column indices 5694 . a - matrix values 5695 . oi - row indices for "off-diagonal" portion of matrix 5696 . oj - column indices 5697 - oa - matrix values 5698 5699 Output Parameter: 5700 . mat - the matrix 5701 5702 Level: advanced 5703 5704 Notes: 5705 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5706 must free the arrays once the matrix has been destroyed and not before. 5707 5708 The i and j indices are 0 based 5709 5710 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5711 5712 This sets local rows and cannot be used to set off-processor values. 5713 5714 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5715 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5716 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5717 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5718 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5719 communication if it is known that only local entries will be set. 5720 5721 .keywords: matrix, aij, compressed row, sparse, parallel 5722 5723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5724 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5725 @*/ 5726 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5727 { 5728 PetscErrorCode ierr; 5729 Mat_MPIAIJ *maij; 5730 5731 PetscFunctionBegin; 5732 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5733 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5734 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5735 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5736 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5737 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5738 maij = (Mat_MPIAIJ*) (*mat)->data; 5739 5740 (*mat)->preallocated = PETSC_TRUE; 5741 5742 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5743 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5744 5745 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5746 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5747 5748 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5750 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 5753 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5754 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5755 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5756 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5757 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5758 PetscFunctionReturn(0); 5759 } 5760 5761 /* 5762 Special version for direct calls from Fortran 5763 */ 5764 #include <petsc/private/fortranimpl.h> 5765 5766 /* Change these macros so can be used in void function */ 5767 #undef CHKERRQ 5768 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5769 #undef SETERRQ2 5770 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5771 #undef SETERRQ3 5772 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5773 #undef SETERRQ 5774 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5775 5776 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5777 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5778 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5779 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5780 #else 5781 #endif 5782 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5783 { 5784 Mat mat = *mmat; 5785 PetscInt m = *mm, n = *mn; 5786 InsertMode addv = *maddv; 5787 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5788 PetscScalar value; 5789 PetscErrorCode ierr; 5790 5791 MatCheckPreallocated(mat,1); 5792 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5793 5794 #if defined(PETSC_USE_DEBUG) 5795 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5796 #endif 5797 { 5798 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5799 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5800 PetscBool roworiented = aij->roworiented; 5801 5802 /* Some Variables required in the macro */ 5803 Mat A = aij->A; 5804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5805 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5806 MatScalar *aa = a->a; 5807 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5808 Mat B = aij->B; 5809 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5810 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5811 MatScalar *ba = b->a; 5812 5813 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5814 PetscInt nonew = a->nonew; 5815 MatScalar *ap1,*ap2; 5816 5817 PetscFunctionBegin; 5818 for (i=0; i<m; i++) { 5819 if (im[i] < 0) continue; 5820 #if defined(PETSC_USE_DEBUG) 5821 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5822 #endif 5823 if (im[i] >= rstart && im[i] < rend) { 5824 row = im[i] - rstart; 5825 lastcol1 = -1; 5826 rp1 = aj + ai[row]; 5827 ap1 = aa + ai[row]; 5828 rmax1 = aimax[row]; 5829 nrow1 = ailen[row]; 5830 low1 = 0; 5831 high1 = nrow1; 5832 lastcol2 = -1; 5833 rp2 = bj + bi[row]; 5834 ap2 = ba + bi[row]; 5835 rmax2 = bimax[row]; 5836 nrow2 = bilen[row]; 5837 low2 = 0; 5838 high2 = nrow2; 5839 5840 for (j=0; j<n; j++) { 5841 if (roworiented) value = v[i*n+j]; 5842 else value = v[i+j*m]; 5843 if (in[j] >= cstart && in[j] < cend) { 5844 col = in[j] - cstart; 5845 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5846 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5847 } else if (in[j] < 0) continue; 5848 #if defined(PETSC_USE_DEBUG) 5849 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5850 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5851 #endif 5852 else { 5853 if (mat->was_assembled) { 5854 if (!aij->colmap) { 5855 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5856 } 5857 #if defined(PETSC_USE_CTABLE) 5858 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5859 col--; 5860 #else 5861 col = aij->colmap[in[j]] - 1; 5862 #endif 5863 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5864 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5866 col = in[j]; 5867 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5868 B = aij->B; 5869 b = (Mat_SeqAIJ*)B->data; 5870 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5871 rp2 = bj + bi[row]; 5872 ap2 = ba + bi[row]; 5873 rmax2 = bimax[row]; 5874 nrow2 = bilen[row]; 5875 low2 = 0; 5876 high2 = nrow2; 5877 bm = aij->B->rmap->n; 5878 ba = b->a; 5879 } 5880 } else col = in[j]; 5881 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5882 } 5883 } 5884 } else if (!aij->donotstash) { 5885 if (roworiented) { 5886 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5887 } else { 5888 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5889 } 5890 } 5891 } 5892 } 5893 PetscFunctionReturnVoid(); 5894 } 5895 5896