1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 /* Easy test: symmetric diagonal block */ 1108 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1109 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1110 if (!*f) PetscFunctionReturn(0); 1111 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 if (size == 1) PetscFunctionReturn(0); 1114 1115 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1116 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1117 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1119 for (i=0; i<first; i++) notme[i] = i; 1120 for (i=last; i<M; i++) notme[i-last+first] = i; 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1122 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1123 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1124 Aoff = Aoffs[0]; 1125 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1126 Boff = Boffs[0]; 1127 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1129 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1131 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1132 ierr = PetscFree(notme);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 /* receive remote parts */ 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1167 { 1168 PetscErrorCode ierr; 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 1171 PetscFunctionBegin; 1172 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1173 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1185 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 #if defined(PETSC_USE_LOG) 1196 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1197 #endif 1198 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1199 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1201 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1202 #if defined(PETSC_USE_CTABLE) 1203 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1204 #else 1205 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1208 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1210 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1211 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1212 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1213 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_ELEMENTAL) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1230 #endif 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1240 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1241 PetscErrorCode ierr; 1242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1243 int fd; 1244 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1245 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1246 PetscScalar *column_values; 1247 PetscInt message_count,flowcontrolcount; 1248 FILE *file; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1253 nz = A->nz + B->nz; 1254 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 if (!rank) { 1256 header[0] = MAT_FILE_CLASSID; 1257 header[1] = mat->rmap->N; 1258 header[2] = mat->cmap->N; 1259 1260 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 /* get largest number of rows any processor has */ 1263 rlen = mat->rmap->n; 1264 range = mat->rmap->range; 1265 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1266 } else { 1267 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 rlen = mat->rmap->n; 1269 } 1270 1271 /* load up the local row counts */ 1272 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1273 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1274 1275 /* store the row lengths to the file */ 1276 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1277 if (!rank) { 1278 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 for (i=1; i<size; i++) { 1280 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1281 rlen = range[i+1] - range[i]; 1282 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1288 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1292 1293 /* load up the local column indices */ 1294 nzmax = nz; /* th processor needs space a largest processor needs */ 1295 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1297 cnt = 0; 1298 for (i=0; i<mat->rmap->n; i++) { 1299 for (j=B->i[i]; j<B->i[i+1]; j++) { 1300 if ((col = garray[B->j[j]]) > cstart) break; 1301 column_indices[cnt++] = col; 1302 } 1303 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1304 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1305 } 1306 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1307 1308 /* store the column indices to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 MPI_Status status; 1312 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1317 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1328 1329 /* load up the local column values */ 1330 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if (garray[B->j[j]] > cstart) break; 1335 column_values[cnt++] = B->a[j]; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1338 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column values to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_values);CHKERRQ(ierr); 1362 1363 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1364 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #include <petscdraw.h> 1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1372 PetscErrorCode ierr; 1373 PetscMPIInt rank = aij->rank,size = aij->size; 1374 PetscBool isdraw,iascii,isbinary; 1375 PetscViewer sviewer; 1376 PetscViewerFormat format; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1382 if (iascii) { 1383 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1384 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1385 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1386 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1387 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1388 for (i=0; i<(PetscInt)size; i++) { 1389 nmax = PetscMax(nmax,nz[i]); 1390 nmin = PetscMin(nmin,nz[i]); 1391 navg += nz[i]; 1392 } 1393 ierr = PetscFree(nz);CHKERRQ(ierr); 1394 navg = navg/size; 1395 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1399 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1400 MatInfo info; 1401 PetscBool inodes; 1402 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1404 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1405 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1407 if (!inodes) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1412 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1413 } 1414 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1421 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1424 PetscInt inodecount,inodelimit,*inodes; 1425 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1426 if (inodes) { 1427 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1433 PetscFunctionReturn(0); 1434 } 1435 } else if (isbinary) { 1436 if (size == 1) { 1437 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1439 } else { 1440 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (isdraw) { 1444 PetscDraw draw; 1445 PetscBool isnull; 1446 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1447 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1448 if (isnull) PetscFunctionReturn(0); 1449 } 1450 1451 { 1452 /* assemble the entire matrix onto first processor. */ 1453 Mat A; 1454 Mat_SeqAIJ *Aloc; 1455 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1456 MatScalar *a; 1457 1458 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1463 } 1464 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1465 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1466 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1467 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1469 1470 /* copy over the A part */ 1471 Aloc = (Mat_SeqAIJ*)aij->A->data; 1472 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 row = mat->rmap->rstart; 1474 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1475 for (i=0; i<m; i++) { 1476 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1477 row++; 1478 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1479 } 1480 aj = Aloc->j; 1481 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1482 1483 /* copy over the B part */ 1484 Aloc = (Mat_SeqAIJ*)aij->B->data; 1485 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 row = mat->rmap->rstart; 1487 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1488 ct = cols; 1489 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1490 for (i=0; i<m; i++) { 1491 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1492 row++; 1493 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1494 } 1495 ierr = PetscFree(ct);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 /* 1499 Everyone has to call to draw the matrix since the graphics waits are 1500 synchronized across all processors that share the PetscDraw object 1501 */ 1502 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1505 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1506 } 1507 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1515 { 1516 PetscErrorCode ierr; 1517 PetscBool iascii,isdraw,issocket,isbinary; 1518 1519 PetscFunctionBegin; 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1524 if (iascii || isdraw || isbinary || issocket) { 1525 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1531 { 1532 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1533 PetscErrorCode ierr; 1534 Vec bb1 = 0; 1535 PetscBool hasop; 1536 1537 PetscFunctionBegin; 1538 if (flag == SOR_APPLY_UPPER) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1544 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1545 } 1546 1547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1548 if (flag & SOR_ZERO_INITIAL_GUESS) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 its--; 1551 } 1552 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_EISENSTAT) { 1597 Vec xx1; 1598 1599 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1601 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (!mat->diag) { 1605 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1606 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1607 } 1608 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1609 if (hasop) { 1610 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1611 } else { 1612 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1613 } 1614 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1615 1616 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1620 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1621 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1622 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1623 1624 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1625 1626 matin->factorerrortype = mat->A->factorerrortype; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1631 { 1632 Mat aA,aB,Aperm; 1633 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1634 PetscScalar *aa,*ba; 1635 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1636 PetscSF rowsf,sf; 1637 IS parcolp = NULL; 1638 PetscBool done; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1644 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1645 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1646 1647 /* Invert row permutation to find out where my rows should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1652 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 1655 /* Invert column permutation to find out where my columns should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1660 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1665 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1666 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1667 1668 /* Find out where my gcols should go */ 1669 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1670 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1672 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1673 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1674 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1676 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1677 1678 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1680 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1681 for (i=0; i<m; i++) { 1682 PetscInt row = rdest[i],rowner; 1683 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1684 for (j=ai[i]; j<ai[i+1]; j++) { 1685 PetscInt cowner,col = cdest[aj[j]]; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 for (j=bi[i]; j<bi[i+1]; j++) { 1691 PetscInt cowner,col = gcdest[bj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscReal isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 MatCheckPreallocated(A,1); 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 case MAT_STRUCTURALLY_SYMMETRIC: 1838 MatCheckPreallocated(A,1); 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_SYMMETRY_ETERNAL: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2019 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2020 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2021 PetscErrorCode ierr; 2022 Mat B,A_diag,*B_diag; 2023 MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 } 2062 2063 b = B->data; 2064 A_diag = a->A; 2065 B_diag = &b->A; 2066 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2067 A_diag_ncol = A_diag->cmap->N; 2068 B_diag_ilen = sub_B_diag->ilen; 2069 B_diag_i = sub_B_diag->i; 2070 for (i=0; i<A_diag_ncol; i++) { 2071 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2072 } 2073 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2074 very quickly (=without using MatSetValues), because all writes are local. */ 2075 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2076 /* copy over the B part */ 2077 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2078 array = Bloc->a; 2079 row = A->rmap->rstart; 2080 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2081 cols_tmp = cols; 2082 for (i=0; i<mb; i++) { 2083 ncol = bi[i+1]-bi[i]; 2084 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2085 row++; 2086 array += ncol; cols_tmp += ncol; 2087 } 2088 ierr = PetscFree(cols);CHKERRQ(ierr); 2089 2090 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2091 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2093 *matout = B; 2094 } else { 2095 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2101 { 2102 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2103 Mat a = aij->A,b = aij->B; 2104 PetscErrorCode ierr; 2105 PetscInt s1,s2,s3; 2106 2107 PetscFunctionBegin; 2108 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2109 if (rr) { 2110 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2111 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2112 /* Overlap communication with computation. */ 2113 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 } 2115 if (ll) { 2116 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2117 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2118 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2119 } 2120 /* scale the diagonal block */ 2121 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2122 2123 if (rr) { 2124 /* Do a scatter end and then right scale the off-diagonal block */ 2125 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2126 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2142 { 2143 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2144 Mat a,b,c,d; 2145 PetscBool flg; 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 a = matA->A; b = matA->B; 2150 c = matB->A; d = matB->B; 2151 2152 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2153 if (flg) { 2154 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2155 } 2156 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2161 { 2162 PetscErrorCode ierr; 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2164 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2165 2166 PetscFunctionBegin; 2167 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2168 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2169 /* because of the column compression in the off-processor part of the matrix a->B, 2170 the number of columns in a->B and b->B may be different, hence we cannot call 2171 the MatCopy() directly on the two parts. If need be, we can provide a more 2172 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2173 then copying the submatrices */ 2174 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2175 } else { 2176 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2177 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2178 } 2179 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2184 { 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 2192 /* 2193 Computes the number of nonzeros per row needed for preallocation when X and Y 2194 have different nonzero structure. 2195 */ 2196 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2197 { 2198 PetscInt i,j,k,nzx,nzy; 2199 2200 PetscFunctionBegin; 2201 /* Set the number of nonzeros in the new matrix */ 2202 for (i=0; i<m; i++) { 2203 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2204 nzx = xi[i+1] - xi[i]; 2205 nzy = yi[i+1] - yi[i]; 2206 nnz[i] = 0; 2207 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2208 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2209 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2210 nnz[i]++; 2211 } 2212 for (; k<nzy; k++) nnz[i]++; 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2218 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2219 { 2220 PetscErrorCode ierr; 2221 PetscInt m = Y->rmap->N; 2222 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2223 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2224 2225 PetscFunctionBegin; 2226 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2231 { 2232 PetscErrorCode ierr; 2233 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2234 PetscBLASInt bnz,one=1; 2235 Mat_SeqAIJ *x,*y; 2236 2237 PetscFunctionBegin; 2238 if (str == SAME_NONZERO_PATTERN) { 2239 PetscScalar alpha = a; 2240 x = (Mat_SeqAIJ*)xx->A->data; 2241 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2242 y = (Mat_SeqAIJ*)yy->A->data; 2243 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2244 x = (Mat_SeqAIJ*)xx->B->data; 2245 y = (Mat_SeqAIJ*)yy->B->data; 2246 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2249 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2250 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2251 } else { 2252 Mat B; 2253 PetscInt *nnz_d,*nnz_o; 2254 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2255 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2256 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2257 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2258 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2259 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2260 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2261 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2262 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2263 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2264 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2265 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2266 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2267 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 2272 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2273 2274 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2275 { 2276 #if defined(PETSC_USE_COMPLEX) 2277 PetscErrorCode ierr; 2278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2279 2280 PetscFunctionBegin; 2281 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2282 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2283 #else 2284 PetscFunctionBegin; 2285 #endif 2286 PetscFunctionReturn(0); 2287 } 2288 2289 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2290 { 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2296 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2301 { 2302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2307 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 2311 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 PetscInt i,*idxb = 0; 2316 PetscScalar *va,*vb; 2317 Vec vtmp; 2318 2319 PetscFunctionBegin; 2320 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2321 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2322 if (idx) { 2323 for (i=0; i<A->rmap->n; i++) { 2324 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2325 } 2326 } 2327 2328 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2329 if (idx) { 2330 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2331 } 2332 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2333 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2334 2335 for (i=0; i<A->rmap->n; i++) { 2336 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2337 va[i] = vb[i]; 2338 if (idx) idx[i] = a->garray[idxb[i]]; 2339 } 2340 } 2341 2342 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2343 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2344 ierr = PetscFree(idxb);CHKERRQ(ierr); 2345 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2350 { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2352 PetscErrorCode ierr; 2353 PetscInt i,*idxb = 0; 2354 PetscScalar *va,*vb; 2355 Vec vtmp; 2356 2357 PetscFunctionBegin; 2358 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2359 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2360 if (idx) { 2361 for (i=0; i<A->cmap->n; i++) { 2362 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2363 } 2364 } 2365 2366 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2367 if (idx) { 2368 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2369 } 2370 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2371 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2372 2373 for (i=0; i<A->rmap->n; i++) { 2374 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2375 va[i] = vb[i]; 2376 if (idx) idx[i] = a->garray[idxb[i]]; 2377 } 2378 } 2379 2380 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2381 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2382 ierr = PetscFree(idxb);CHKERRQ(ierr); 2383 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2388 { 2389 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2390 PetscInt n = A->rmap->n; 2391 PetscInt cstart = A->cmap->rstart; 2392 PetscInt *cmap = mat->garray; 2393 PetscInt *diagIdx, *offdiagIdx; 2394 Vec diagV, offdiagV; 2395 PetscScalar *a, *diagA, *offdiagA; 2396 PetscInt r; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2402 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2403 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2404 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 for (r = 0; r < n; ++r) { 2409 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 idx[r] = cstart + diagIdx[r]; 2412 } else { 2413 a[r] = offdiagA[r]; 2414 idx[r] = cmap[offdiagIdx[r]]; 2415 } 2416 } 2417 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2421 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2422 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2427 { 2428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2429 PetscInt n = A->rmap->n; 2430 PetscInt cstart = A->cmap->rstart; 2431 PetscInt *cmap = mat->garray; 2432 PetscInt *diagIdx, *offdiagIdx; 2433 Vec diagV, offdiagV; 2434 PetscScalar *a, *diagA, *offdiagA; 2435 PetscInt r; 2436 PetscErrorCode ierr; 2437 2438 PetscFunctionBegin; 2439 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2440 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2441 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2442 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2443 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2445 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2446 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2447 for (r = 0; r < n; ++r) { 2448 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2449 a[r] = diagA[r]; 2450 idx[r] = cstart + diagIdx[r]; 2451 } else { 2452 a[r] = offdiagA[r]; 2453 idx[r] = cmap[offdiagIdx[r]]; 2454 } 2455 } 2456 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2457 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2459 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2460 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2461 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2466 { 2467 PetscErrorCode ierr; 2468 Mat *dummy; 2469 2470 PetscFunctionBegin; 2471 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2472 *newmat = *dummy; 2473 ierr = PetscFree(dummy);CHKERRQ(ierr); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2478 { 2479 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2484 A->factorerrortype = a->A->factorerrortype; 2485 PetscFunctionReturn(0); 2486 } 2487 2488 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2489 { 2490 PetscErrorCode ierr; 2491 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2492 2493 PetscFunctionBegin; 2494 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2495 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2496 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2502 { 2503 PetscFunctionBegin; 2504 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2505 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@ 2510 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2511 2512 Collective on Mat 2513 2514 Input Parameters: 2515 + A - the matrix 2516 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2517 2518 Level: advanced 2519 2520 @*/ 2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2522 { 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2531 { 2532 PetscErrorCode ierr; 2533 PetscBool sc = PETSC_FALSE,flg; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2537 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2538 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2539 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2540 if (flg) { 2541 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2542 } 2543 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2548 { 2549 PetscErrorCode ierr; 2550 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2551 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2552 2553 PetscFunctionBegin; 2554 if (!Y->preallocated) { 2555 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2556 } else if (!aij->nz) { 2557 PetscInt nonew = aij->nonew; 2558 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2559 aij->nonew = nonew; 2560 } 2561 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2566 { 2567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2568 PetscErrorCode ierr; 2569 2570 PetscFunctionBegin; 2571 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2572 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2573 if (d) { 2574 PetscInt rstart; 2575 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2576 *d += rstart; 2577 2578 } 2579 PetscFunctionReturn(0); 2580 } 2581 2582 2583 /* -------------------------------------------------------------------*/ 2584 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2585 MatGetRow_MPIAIJ, 2586 MatRestoreRow_MPIAIJ, 2587 MatMult_MPIAIJ, 2588 /* 4*/ MatMultAdd_MPIAIJ, 2589 MatMultTranspose_MPIAIJ, 2590 MatMultTransposeAdd_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 /*10*/ 0, 2595 0, 2596 0, 2597 MatSOR_MPIAIJ, 2598 MatTranspose_MPIAIJ, 2599 /*15*/ MatGetInfo_MPIAIJ, 2600 MatEqual_MPIAIJ, 2601 MatGetDiagonal_MPIAIJ, 2602 MatDiagonalScale_MPIAIJ, 2603 MatNorm_MPIAIJ, 2604 /*20*/ MatAssemblyBegin_MPIAIJ, 2605 MatAssemblyEnd_MPIAIJ, 2606 MatSetOption_MPIAIJ, 2607 MatZeroEntries_MPIAIJ, 2608 /*24*/ MatZeroRows_MPIAIJ, 2609 0, 2610 0, 2611 0, 2612 0, 2613 /*29*/ MatSetUp_MPIAIJ, 2614 0, 2615 0, 2616 MatGetDiagonalBlock_MPIAIJ, 2617 0, 2618 /*34*/ MatDuplicate_MPIAIJ, 2619 0, 2620 0, 2621 0, 2622 0, 2623 /*39*/ MatAXPY_MPIAIJ, 2624 MatCreateSubMatrices_MPIAIJ, 2625 MatIncreaseOverlap_MPIAIJ, 2626 MatGetValues_MPIAIJ, 2627 MatCopy_MPIAIJ, 2628 /*44*/ MatGetRowMax_MPIAIJ, 2629 MatScale_MPIAIJ, 2630 MatShift_MPIAIJ, 2631 MatDiagonalSet_MPIAIJ, 2632 MatZeroRowsColumns_MPIAIJ, 2633 /*49*/ MatSetRandom_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*54*/ MatFDColoringCreate_MPIXAIJ, 2639 0, 2640 MatSetUnfactored_MPIAIJ, 2641 MatPermute_MPIAIJ, 2642 0, 2643 /*59*/ MatCreateSubMatrix_MPIAIJ, 2644 MatDestroy_MPIAIJ, 2645 MatView_MPIAIJ, 2646 0, 2647 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2648 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2649 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2654 MatGetRowMinAbs_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*75*/ MatFDColoringApply_AIJ, 2660 MatSetFromOptions_MPIAIJ, 2661 0, 2662 0, 2663 MatFindZeroDiagonals_MPIAIJ, 2664 /*80*/ 0, 2665 0, 2666 0, 2667 /*83*/ MatLoad_MPIAIJ, 2668 MatIsSymmetric_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2674 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2675 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2676 MatPtAP_MPIAIJ_MPIAIJ, 2677 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2678 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*99*/ 0, 2684 0, 2685 0, 2686 MatConjugate_MPIAIJ, 2687 0, 2688 /*104*/MatSetValuesRow_MPIAIJ, 2689 MatRealPart_MPIAIJ, 2690 MatImaginaryPart_MPIAIJ, 2691 0, 2692 0, 2693 /*109*/0, 2694 0, 2695 MatGetRowMin_MPIAIJ, 2696 0, 2697 MatMissingDiagonal_MPIAIJ, 2698 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2699 0, 2700 MatGetGhosts_MPIAIJ, 2701 0, 2702 0, 2703 /*119*/0, 2704 0, 2705 0, 2706 0, 2707 MatGetMultiProcBlock_MPIAIJ, 2708 /*124*/MatFindNonzeroRows_MPIAIJ, 2709 MatGetColumnNorms_MPIAIJ, 2710 MatInvertBlockDiagonal_MPIAIJ, 2711 0, 2712 MatCreateSubMatricesMPI_MPIAIJ, 2713 /*129*/0, 2714 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2715 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2717 0, 2718 /*134*/0, 2719 0, 2720 MatRARt_MPIAIJ_MPIAIJ, 2721 0, 2722 0, 2723 /*139*/MatSetBlockSizes_MPIAIJ, 2724 0, 2725 0, 2726 MatFDColoringSetUp_MPIXAIJ, 2727 MatFindOffBlockDiagonalEntries_MPIAIJ, 2728 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2729 }; 2730 2731 /* ----------------------------------------------------------------------------------------*/ 2732 2733 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2745 { 2746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2751 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2752 PetscFunctionReturn(0); 2753 } 2754 2755 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2756 { 2757 Mat_MPIAIJ *b; 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2763 b = (Mat_MPIAIJ*)B->data; 2764 2765 #if defined(PETSC_USE_CTABLE) 2766 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2767 #else 2768 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2769 #endif 2770 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2771 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2772 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2773 2774 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2775 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2776 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2777 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2778 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2779 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2780 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2781 2782 if (!B->preallocated) { 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2785 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2786 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2788 } 2789 2790 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2791 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2792 B->preallocated = PETSC_TRUE; 2793 B->was_assembled = PETSC_FALSE; 2794 B->assembled = PETSC_FALSE;; 2795 PetscFunctionReturn(0); 2796 } 2797 2798 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2799 { 2800 Mat_MPIAIJ *b; 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2805 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2807 b = (Mat_MPIAIJ*)B->data; 2808 2809 #if defined(PETSC_USE_CTABLE) 2810 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2811 #else 2812 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2813 #endif 2814 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2815 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2816 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2817 2818 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2819 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2820 B->preallocated = PETSC_TRUE; 2821 B->was_assembled = PETSC_FALSE; 2822 B->assembled = PETSC_FALSE; 2823 PetscFunctionReturn(0); 2824 } 2825 2826 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2827 { 2828 Mat mat; 2829 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2830 PetscErrorCode ierr; 2831 2832 PetscFunctionBegin; 2833 *newmat = 0; 2834 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2835 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2836 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2837 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2838 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2839 a = (Mat_MPIAIJ*)mat->data; 2840 2841 mat->factortype = matin->factortype; 2842 mat->assembled = PETSC_TRUE; 2843 mat->insertmode = NOT_SET_VALUES; 2844 mat->preallocated = PETSC_TRUE; 2845 2846 a->size = oldmat->size; 2847 a->rank = oldmat->rank; 2848 a->donotstash = oldmat->donotstash; 2849 a->roworiented = oldmat->roworiented; 2850 a->rowindices = 0; 2851 a->rowvalues = 0; 2852 a->getrowactive = PETSC_FALSE; 2853 2854 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2856 2857 if (oldmat->colmap) { 2858 #if defined(PETSC_USE_CTABLE) 2859 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2860 #else 2861 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2862 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2863 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2864 #endif 2865 } else a->colmap = 0; 2866 if (oldmat->garray) { 2867 PetscInt len; 2868 len = oldmat->B->cmap->n; 2869 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2870 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2871 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2872 } else a->garray = 0; 2873 2874 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2875 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2876 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2877 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2878 2879 if (oldmat->Mvctx_mpi1) { 2880 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2882 } 2883 2884 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2886 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2888 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2889 *newmat = mat; 2890 PetscFunctionReturn(0); 2891 } 2892 2893 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2894 { 2895 PetscScalar *vals,*svals; 2896 MPI_Comm comm; 2897 PetscErrorCode ierr; 2898 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2899 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2900 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2901 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2902 PetscInt cend,cstart,n,*rowners; 2903 int fd; 2904 PetscInt bs = newMat->rmap->bs; 2905 2906 PetscFunctionBegin; 2907 /* force binary viewer to load .info file if it has not yet done so */ 2908 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2909 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2911 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2912 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2913 if (!rank) { 2914 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2915 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2916 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2917 } 2918 2919 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2920 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2921 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2922 if (bs < 0) bs = 1; 2923 2924 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2925 M = header[1]; N = header[2]; 2926 2927 /* If global sizes are set, check if they are consistent with that given in the file */ 2928 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2929 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2930 2931 /* determine ownership of all (block) rows */ 2932 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2933 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2934 else m = newMat->rmap->n; /* Set by user */ 2935 2936 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2937 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2938 2939 /* First process needs enough room for process with most rows */ 2940 if (!rank) { 2941 mmax = rowners[1]; 2942 for (i=2; i<=size; i++) { 2943 mmax = PetscMax(mmax, rowners[i]); 2944 } 2945 } else mmax = -1; /* unused, but compilers complain */ 2946 2947 rowners[0] = 0; 2948 for (i=2; i<=size; i++) { 2949 rowners[i] += rowners[i-1]; 2950 } 2951 rstart = rowners[rank]; 2952 rend = rowners[rank+1]; 2953 2954 /* distribute row lengths to all processors */ 2955 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2956 if (!rank) { 2957 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2958 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2959 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2960 for (j=0; j<m; j++) { 2961 procsnz[0] += ourlens[j]; 2962 } 2963 for (i=1; i<size; i++) { 2964 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2965 /* calculate the number of nonzeros on each processor */ 2966 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2967 procsnz[i] += rowlengths[j]; 2968 } 2969 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2970 } 2971 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2972 } else { 2973 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2974 } 2975 2976 if (!rank) { 2977 /* determine max buffer needed and allocate it */ 2978 maxnz = 0; 2979 for (i=0; i<size; i++) { 2980 maxnz = PetscMax(maxnz,procsnz[i]); 2981 } 2982 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2983 2984 /* read in my part of the matrix column indices */ 2985 nz = procsnz[0]; 2986 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2987 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2988 2989 /* read in every one elses and ship off */ 2990 for (i=1; i<size; i++) { 2991 nz = procsnz[i]; 2992 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2993 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2994 } 2995 ierr = PetscFree(cols);CHKERRQ(ierr); 2996 } else { 2997 /* determine buffer space needed for message */ 2998 nz = 0; 2999 for (i=0; i<m; i++) { 3000 nz += ourlens[i]; 3001 } 3002 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3003 3004 /* receive message of column indices*/ 3005 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3006 } 3007 3008 /* determine column ownership if matrix is not square */ 3009 if (N != M) { 3010 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3011 else n = newMat->cmap->n; 3012 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3013 cstart = cend - n; 3014 } else { 3015 cstart = rstart; 3016 cend = rend; 3017 n = cend - cstart; 3018 } 3019 3020 /* loop over local rows, determining number of off diagonal entries */ 3021 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3022 jj = 0; 3023 for (i=0; i<m; i++) { 3024 for (j=0; j<ourlens[i]; j++) { 3025 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3026 jj++; 3027 } 3028 } 3029 3030 for (i=0; i<m; i++) { 3031 ourlens[i] -= offlens[i]; 3032 } 3033 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3034 3035 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3036 3037 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3038 3039 for (i=0; i<m; i++) { 3040 ourlens[i] += offlens[i]; 3041 } 3042 3043 if (!rank) { 3044 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3045 3046 /* read in my part of the matrix numerical values */ 3047 nz = procsnz[0]; 3048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3049 3050 /* insert into matrix */ 3051 jj = rstart; 3052 smycols = mycols; 3053 svals = vals; 3054 for (i=0; i<m; i++) { 3055 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 smycols += ourlens[i]; 3057 svals += ourlens[i]; 3058 jj++; 3059 } 3060 3061 /* read in other processors and ship out */ 3062 for (i=1; i<size; i++) { 3063 nz = procsnz[i]; 3064 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3065 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3066 } 3067 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3068 } else { 3069 /* receive numeric values */ 3070 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3071 3072 /* receive message of values*/ 3073 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3074 3075 /* insert into matrix */ 3076 jj = rstart; 3077 smycols = mycols; 3078 svals = vals; 3079 for (i=0; i<m; i++) { 3080 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3081 smycols += ourlens[i]; 3082 svals += ourlens[i]; 3083 jj++; 3084 } 3085 } 3086 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3087 ierr = PetscFree(vals);CHKERRQ(ierr); 3088 ierr = PetscFree(mycols);CHKERRQ(ierr); 3089 ierr = PetscFree(rowners);CHKERRQ(ierr); 3090 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 PetscFunctionReturn(0); 3093 } 3094 3095 /* Not scalable because of ISAllGather() unless getting all columns. */ 3096 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3097 { 3098 PetscErrorCode ierr; 3099 IS iscol_local; 3100 PetscBool isstride; 3101 PetscMPIInt lisstride=0,gisstride; 3102 3103 PetscFunctionBegin; 3104 /* check if we are grabbing all columns*/ 3105 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3106 3107 if (isstride) { 3108 PetscInt start,len,mstart,mlen; 3109 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3110 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3111 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3112 if (mstart == start && mlen-mstart == len) lisstride = 1; 3113 } 3114 3115 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3116 if (gisstride) { 3117 PetscInt N; 3118 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3119 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3120 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3121 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3122 } else { 3123 PetscInt cbs; 3124 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3125 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3126 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3127 } 3128 3129 *isseq = iscol_local; 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /* 3134 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3135 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3136 3137 Input Parameters: 3138 mat - matrix 3139 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3140 i.e., mat->rstart <= isrow[i] < mat->rend 3141 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3142 i.e., mat->cstart <= iscol[i] < mat->cend 3143 Output Parameter: 3144 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3145 iscol_o - sequential column index set for retrieving mat->B 3146 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3147 */ 3148 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3149 { 3150 PetscErrorCode ierr; 3151 Vec x,cmap; 3152 const PetscInt *is_idx; 3153 PetscScalar *xarray,*cmaparray; 3154 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3155 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3156 Mat B=a->B; 3157 Vec lvec=a->lvec,lcmap; 3158 PetscInt i,cstart,cend,Bn=B->cmap->N; 3159 MPI_Comm comm; 3160 VecScatter Mvctx=a->Mvctx; 3161 3162 PetscFunctionBegin; 3163 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3164 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3165 3166 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3167 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3168 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3169 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3170 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3171 3172 /* Get start indices */ 3173 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3174 isstart -= ncols; 3175 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3176 3177 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3178 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3179 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3180 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3181 for (i=0; i<ncols; i++) { 3182 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3183 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3184 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3185 } 3186 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3187 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3188 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3189 3190 /* Get iscol_d */ 3191 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3192 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3193 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3194 3195 /* Get isrow_d */ 3196 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3197 rstart = mat->rmap->rstart; 3198 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3199 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3200 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3201 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3202 3203 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3204 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3205 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3206 3207 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3208 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 3211 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3212 3213 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 3216 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3217 /* off-process column indices */ 3218 count = 0; 3219 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3220 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3221 3222 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3223 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3224 for (i=0; i<Bn; i++) { 3225 if (PetscRealPart(xarray[i]) > -1.0) { 3226 idx[count] = i; /* local column index in off-diagonal part B */ 3227 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3228 count++; 3229 } 3230 } 3231 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3232 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3233 3234 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3235 /* cannot ensure iscol_o has same blocksize as iscol! */ 3236 3237 ierr = PetscFree(idx);CHKERRQ(ierr); 3238 *garray = cmap1; 3239 3240 ierr = VecDestroy(&x);CHKERRQ(ierr); 3241 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3242 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3243 PetscFunctionReturn(0); 3244 } 3245 3246 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3247 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3248 { 3249 PetscErrorCode ierr; 3250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3251 Mat M = NULL; 3252 MPI_Comm comm; 3253 IS iscol_d,isrow_d,iscol_o; 3254 Mat Asub = NULL,Bsub = NULL; 3255 PetscInt n; 3256 3257 PetscFunctionBegin; 3258 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3259 3260 if (call == MAT_REUSE_MATRIX) { 3261 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3262 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3263 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3264 3265 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3266 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3267 3268 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3269 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3270 3271 /* Update diagonal and off-diagonal portions of submat */ 3272 asub = (Mat_MPIAIJ*)(*submat)->data; 3273 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3274 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3275 if (n) { 3276 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3277 } 3278 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3279 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 3281 } else { /* call == MAT_INITIAL_MATRIX) */ 3282 const PetscInt *garray; 3283 PetscInt BsubN; 3284 3285 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3286 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3287 3288 /* Create local submatrices Asub and Bsub */ 3289 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3290 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3291 3292 /* Create submatrix M */ 3293 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3294 3295 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3296 asub = (Mat_MPIAIJ*)M->data; 3297 3298 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3299 n = asub->B->cmap->N; 3300 if (BsubN > n) { 3301 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3302 const PetscInt *idx; 3303 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3304 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3305 3306 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3307 j = 0; 3308 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3309 for (i=0; i<n; i++) { 3310 if (j >= BsubN) break; 3311 while (subgarray[i] > garray[j]) j++; 3312 3313 if (subgarray[i] == garray[j]) { 3314 idx_new[i] = idx[j++]; 3315 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3316 } 3317 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3318 3319 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3320 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3321 3322 } else if (BsubN < n) { 3323 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3324 } 3325 3326 ierr = PetscFree(garray);CHKERRQ(ierr); 3327 *submat = M; 3328 3329 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3330 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3331 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3332 3333 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3334 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3335 3336 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3337 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3338 } 3339 PetscFunctionReturn(0); 3340 } 3341 3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3343 { 3344 PetscErrorCode ierr; 3345 IS iscol_local=NULL,isrow_d; 3346 PetscInt csize; 3347 PetscInt n,i,j,start,end; 3348 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3349 MPI_Comm comm; 3350 3351 PetscFunctionBegin; 3352 /* If isrow has same processor distribution as mat, 3353 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3354 if (call == MAT_REUSE_MATRIX) { 3355 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3356 if (isrow_d) { 3357 sameRowDist = PETSC_TRUE; 3358 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3359 } else { 3360 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3361 if (iscol_local) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3364 } 3365 } 3366 } else { 3367 /* Check if isrow has same processor distribution as mat */ 3368 sameDist[0] = PETSC_FALSE; 3369 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3370 if (!n) { 3371 sameDist[0] = PETSC_TRUE; 3372 } else { 3373 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3374 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3375 if (i >= start && j < end) { 3376 sameDist[0] = PETSC_TRUE; 3377 } 3378 } 3379 3380 /* Check if iscol has same processor distribution as mat */ 3381 sameDist[1] = PETSC_FALSE; 3382 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3383 if (!n) { 3384 sameDist[1] = PETSC_TRUE; 3385 } else { 3386 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3387 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3388 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3389 } 3390 3391 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3392 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3393 sameRowDist = tsameDist[0]; 3394 } 3395 3396 if (sameRowDist) { 3397 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3398 /* isrow and iscol have same processor distribution as mat */ 3399 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } else { /* sameRowDist */ 3402 /* isrow has same processor distribution as mat */ 3403 if (call == MAT_INITIAL_MATRIX) { 3404 PetscBool sorted; 3405 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3406 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3407 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3408 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3409 3410 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3411 if (sorted) { 3412 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3413 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3414 PetscFunctionReturn(0); 3415 } 3416 } else { /* call == MAT_REUSE_MATRIX */ 3417 IS iscol_sub; 3418 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3419 if (iscol_sub) { 3420 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 } 3424 } 3425 } 3426 3427 /* General case: iscol -> iscol_local which has global size of iscol */ 3428 if (call == MAT_REUSE_MATRIX) { 3429 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3430 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3431 } else { 3432 if (!iscol_local) { 3433 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3434 } 3435 } 3436 3437 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3438 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3439 3440 if (call == MAT_INITIAL_MATRIX) { 3441 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3442 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3443 } 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3449 and "off-diagonal" part of the matrix in CSR format. 3450 3451 Collective on MPI_Comm 3452 3453 Input Parameters: 3454 + comm - MPI communicator 3455 . A - "diagonal" portion of matrix 3456 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3457 - garray - global index of B columns 3458 3459 Output Parameter: 3460 . mat - the matrix, with input A as its local diagonal matrix 3461 Level: advanced 3462 3463 Notes: 3464 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3465 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3466 3467 .seealso: MatCreateMPIAIJWithSplitArrays() 3468 @*/ 3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3470 { 3471 PetscErrorCode ierr; 3472 Mat_MPIAIJ *maij; 3473 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3474 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3475 PetscScalar *oa=b->a; 3476 Mat Bnew; 3477 PetscInt m,n,N; 3478 3479 PetscFunctionBegin; 3480 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3481 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3482 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3483 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3484 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3485 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3486 3487 /* Get global columns of mat */ 3488 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3489 3490 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3491 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3492 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3493 maij = (Mat_MPIAIJ*)(*mat)->data; 3494 3495 (*mat)->preallocated = PETSC_TRUE; 3496 3497 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3498 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3499 3500 /* Set A as diagonal portion of *mat */ 3501 maij->A = A; 3502 3503 nz = oi[m]; 3504 for (i=0; i<nz; i++) { 3505 col = oj[i]; 3506 oj[i] = garray[col]; 3507 } 3508 3509 /* Set Bnew as off-diagonal portion of *mat */ 3510 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3511 bnew = (Mat_SeqAIJ*)Bnew->data; 3512 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3513 maij->B = Bnew; 3514 3515 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3516 3517 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3518 b->free_a = PETSC_FALSE; 3519 b->free_ij = PETSC_FALSE; 3520 ierr = MatDestroy(&B);CHKERRQ(ierr); 3521 3522 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3523 bnew->free_a = PETSC_TRUE; 3524 bnew->free_ij = PETSC_TRUE; 3525 3526 /* condense columns of maij->B */ 3527 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3528 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3529 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3530 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3531 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3536 3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3538 { 3539 PetscErrorCode ierr; 3540 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3541 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3542 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3543 Mat M,Msub,B=a->B; 3544 MatScalar *aa; 3545 Mat_SeqAIJ *aij; 3546 PetscInt *garray = a->garray,*colsub,Ncols; 3547 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3548 IS iscol_sub,iscmap; 3549 const PetscInt *is_idx,*cmap; 3550 PetscBool allcolumns=PETSC_FALSE; 3551 MPI_Comm comm; 3552 3553 PetscFunctionBegin; 3554 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3555 3556 if (call == MAT_REUSE_MATRIX) { 3557 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3558 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3559 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3560 3561 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3562 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3563 3564 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3565 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3566 3567 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3568 3569 } else { /* call == MAT_INITIAL_MATRIX) */ 3570 PetscBool flg; 3571 3572 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3573 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3574 3575 /* (1) iscol -> nonscalable iscol_local */ 3576 /* Check for special case: each processor gets entire matrix columns */ 3577 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3578 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3579 if (allcolumns) { 3580 iscol_sub = iscol_local; 3581 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3582 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3583 3584 } else { 3585 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3586 PetscInt *idx,*cmap1,k; 3587 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3589 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3590 count = 0; 3591 k = 0; 3592 for (i=0; i<Ncols; i++) { 3593 j = is_idx[i]; 3594 if (j >= cstart && j < cend) { 3595 /* diagonal part of mat */ 3596 idx[count] = j; 3597 cmap1[count++] = i; /* column index in submat */ 3598 } else if (Bn) { 3599 /* off-diagonal part of mat */ 3600 if (j == garray[k]) { 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (j > garray[k]) { 3604 while (j > garray[k] && k < Bn-1) k++; 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } 3609 } 3610 } 3611 } 3612 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3613 3614 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3615 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3616 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3617 3618 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3619 } 3620 3621 /* (3) Create sequential Msub */ 3622 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3623 } 3624 3625 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3626 aij = (Mat_SeqAIJ*)(Msub)->data; 3627 ii = aij->i; 3628 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3629 3630 /* 3631 m - number of local rows 3632 Ncols - number of columns (same on all processors) 3633 rstart - first row in new global matrix generated 3634 */ 3635 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3636 3637 if (call == MAT_INITIAL_MATRIX) { 3638 /* (4) Create parallel newmat */ 3639 PetscMPIInt rank,size; 3640 PetscInt csize; 3641 3642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3643 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3644 3645 /* 3646 Determine the number of non-zeros in the diagonal and off-diagonal 3647 portions of the matrix in order to do correct preallocation 3648 */ 3649 3650 /* first get start and end of "diagonal" columns */ 3651 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3652 if (csize == PETSC_DECIDE) { 3653 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3654 if (mglobal == Ncols) { /* square matrix */ 3655 nlocal = m; 3656 } else { 3657 nlocal = Ncols/size + ((Ncols % size) > rank); 3658 } 3659 } else { 3660 nlocal = csize; 3661 } 3662 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3663 rstart = rend - nlocal; 3664 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3665 3666 /* next, compute all the lengths */ 3667 jj = aij->j; 3668 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3669 olens = dlens + m; 3670 for (i=0; i<m; i++) { 3671 jend = ii[i+1] - ii[i]; 3672 olen = 0; 3673 dlen = 0; 3674 for (j=0; j<jend; j++) { 3675 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3676 else dlen++; 3677 jj++; 3678 } 3679 olens[i] = olen; 3680 dlens[i] = dlen; 3681 } 3682 3683 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3684 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3685 3686 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3687 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3688 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3689 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3690 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3691 ierr = PetscFree(dlens);CHKERRQ(ierr); 3692 3693 } else { /* call == MAT_REUSE_MATRIX */ 3694 M = *newmat; 3695 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3696 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3697 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3698 /* 3699 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3700 rather than the slower MatSetValues(). 3701 */ 3702 M->was_assembled = PETSC_TRUE; 3703 M->assembled = PETSC_FALSE; 3704 } 3705 3706 /* (5) Set values of Msub to *newmat */ 3707 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3708 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3709 3710 jj = aij->j; 3711 aa = aij->a; 3712 for (i=0; i<m; i++) { 3713 row = rstart + i; 3714 nz = ii[i+1] - ii[i]; 3715 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3716 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3717 jj += nz; aa += nz; 3718 } 3719 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3720 3721 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 3724 ierr = PetscFree(colsub);CHKERRQ(ierr); 3725 3726 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3727 if (call == MAT_INITIAL_MATRIX) { 3728 *newmat = M; 3729 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3730 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3731 3732 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3733 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3734 3735 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3736 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3737 3738 if (iscol_local) { 3739 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3740 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3741 } 3742 } 3743 PetscFunctionReturn(0); 3744 } 3745 3746 /* 3747 Not great since it makes two copies of the submatrix, first an SeqAIJ 3748 in local and then by concatenating the local matrices the end result. 3749 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3750 3751 Note: This requires a sequential iscol with all indices. 3752 */ 3753 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3754 { 3755 PetscErrorCode ierr; 3756 PetscMPIInt rank,size; 3757 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3758 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3759 Mat M,Mreuse; 3760 MatScalar *aa,*vwork; 3761 MPI_Comm comm; 3762 Mat_SeqAIJ *aij; 3763 PetscBool colflag,allcolumns=PETSC_FALSE; 3764 3765 PetscFunctionBegin; 3766 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3768 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3769 3770 /* Check for special case: each processor gets entire matrix columns */ 3771 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3772 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3773 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3774 3775 if (call == MAT_REUSE_MATRIX) { 3776 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3777 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3778 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3779 } else { 3780 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3781 } 3782 3783 /* 3784 m - number of local rows 3785 n - number of columns (same on all processors) 3786 rstart - first row in new global matrix generated 3787 */ 3788 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3789 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3790 if (call == MAT_INITIAL_MATRIX) { 3791 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3792 ii = aij->i; 3793 jj = aij->j; 3794 3795 /* 3796 Determine the number of non-zeros in the diagonal and off-diagonal 3797 portions of the matrix in order to do correct preallocation 3798 */ 3799 3800 /* first get start and end of "diagonal" columns */ 3801 if (csize == PETSC_DECIDE) { 3802 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3803 if (mglobal == n) { /* square matrix */ 3804 nlocal = m; 3805 } else { 3806 nlocal = n/size + ((n % size) > rank); 3807 } 3808 } else { 3809 nlocal = csize; 3810 } 3811 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3812 rstart = rend - nlocal; 3813 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3814 3815 /* next, compute all the lengths */ 3816 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3817 olens = dlens + m; 3818 for (i=0; i<m; i++) { 3819 jend = ii[i+1] - ii[i]; 3820 olen = 0; 3821 dlen = 0; 3822 for (j=0; j<jend; j++) { 3823 if (*jj < rstart || *jj >= rend) olen++; 3824 else dlen++; 3825 jj++; 3826 } 3827 olens[i] = olen; 3828 dlens[i] = dlen; 3829 } 3830 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3831 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3832 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3833 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3834 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3835 ierr = PetscFree(dlens);CHKERRQ(ierr); 3836 } else { 3837 PetscInt ml,nl; 3838 3839 M = *newmat; 3840 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3841 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3842 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3843 /* 3844 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3845 rather than the slower MatSetValues(). 3846 */ 3847 M->was_assembled = PETSC_TRUE; 3848 M->assembled = PETSC_FALSE; 3849 } 3850 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3851 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3852 ii = aij->i; 3853 jj = aij->j; 3854 aa = aij->a; 3855 for (i=0; i<m; i++) { 3856 row = rstart + i; 3857 nz = ii[i+1] - ii[i]; 3858 cwork = jj; jj += nz; 3859 vwork = aa; aa += nz; 3860 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3861 } 3862 3863 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 *newmat = M; 3866 3867 /* save submatrix used in processor for next request */ 3868 if (call == MAT_INITIAL_MATRIX) { 3869 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3870 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3871 } 3872 PetscFunctionReturn(0); 3873 } 3874 3875 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3876 { 3877 PetscInt m,cstart, cend,j,nnz,i,d; 3878 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3879 const PetscInt *JJ; 3880 PetscScalar *values; 3881 PetscErrorCode ierr; 3882 PetscBool nooffprocentries; 3883 3884 PetscFunctionBegin; 3885 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3886 3887 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3888 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3889 m = B->rmap->n; 3890 cstart = B->cmap->rstart; 3891 cend = B->cmap->rend; 3892 rstart = B->rmap->rstart; 3893 3894 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3895 3896 #if defined(PETSC_USE_DEBUG) 3897 for (i=0; i<m; i++) { 3898 nnz = Ii[i+1]- Ii[i]; 3899 JJ = J + Ii[i]; 3900 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3901 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3902 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3903 } 3904 #endif 3905 3906 for (i=0; i<m; i++) { 3907 nnz = Ii[i+1]- Ii[i]; 3908 JJ = J + Ii[i]; 3909 nnz_max = PetscMax(nnz_max,nnz); 3910 d = 0; 3911 for (j=0; j<nnz; j++) { 3912 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3913 } 3914 d_nnz[i] = d; 3915 o_nnz[i] = nnz - d; 3916 } 3917 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3918 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3919 3920 if (v) values = (PetscScalar*)v; 3921 else { 3922 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3923 } 3924 3925 for (i=0; i<m; i++) { 3926 ii = i + rstart; 3927 nnz = Ii[i+1]- Ii[i]; 3928 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3929 } 3930 nooffprocentries = B->nooffprocentries; 3931 B->nooffprocentries = PETSC_TRUE; 3932 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3933 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 B->nooffprocentries = nooffprocentries; 3935 3936 if (!v) { 3937 ierr = PetscFree(values);CHKERRQ(ierr); 3938 } 3939 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942 3943 /*@ 3944 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3945 (the default parallel PETSc format). 3946 3947 Collective on MPI_Comm 3948 3949 Input Parameters: 3950 + B - the matrix 3951 . i - the indices into j for the start of each local row (starts with zero) 3952 . j - the column indices for each local row (starts with zero) 3953 - v - optional values in the matrix 3954 3955 Level: developer 3956 3957 Notes: 3958 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3959 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3960 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3961 3962 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3963 3964 The format which is used for the sparse matrix input, is equivalent to a 3965 row-major ordering.. i.e for the following matrix, the input data expected is 3966 as shown 3967 3968 $ 1 0 0 3969 $ 2 0 3 P0 3970 $ ------- 3971 $ 4 5 6 P1 3972 $ 3973 $ Process0 [P0]: rows_owned=[0,1] 3974 $ i = {0,1,3} [size = nrow+1 = 2+1] 3975 $ j = {0,0,2} [size = 3] 3976 $ v = {1,2,3} [size = 3] 3977 $ 3978 $ Process1 [P1]: rows_owned=[2] 3979 $ i = {0,3} [size = nrow+1 = 1+1] 3980 $ j = {0,1,2} [size = 3] 3981 $ v = {4,5,6} [size = 3] 3982 3983 .keywords: matrix, aij, compressed row, sparse, parallel 3984 3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3986 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3987 @*/ 3988 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3989 { 3990 PetscErrorCode ierr; 3991 3992 PetscFunctionBegin; 3993 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3994 PetscFunctionReturn(0); 3995 } 3996 3997 /*@C 3998 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3999 (the default parallel PETSc format). For good matrix assembly performance 4000 the user should preallocate the matrix storage by setting the parameters 4001 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4002 performance can be increased by more than a factor of 50. 4003 4004 Collective on MPI_Comm 4005 4006 Input Parameters: 4007 + B - the matrix 4008 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4009 (same value is used for all local rows) 4010 . d_nnz - array containing the number of nonzeros in the various rows of the 4011 DIAGONAL portion of the local submatrix (possibly different for each row) 4012 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4013 The size of this array is equal to the number of local rows, i.e 'm'. 4014 For matrices that will be factored, you must leave room for (and set) 4015 the diagonal entry even if it is zero. 4016 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4017 submatrix (same value is used for all local rows). 4018 - o_nnz - array containing the number of nonzeros in the various rows of the 4019 OFF-DIAGONAL portion of the local submatrix (possibly different for 4020 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4021 structure. The size of this array is equal to the number 4022 of local rows, i.e 'm'. 4023 4024 If the *_nnz parameter is given then the *_nz parameter is ignored 4025 4026 The AIJ format (also called the Yale sparse matrix format or 4027 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4028 storage. The stored row and column indices begin with zero. 4029 See Users-Manual: ch_mat for details. 4030 4031 The parallel matrix is partitioned such that the first m0 rows belong to 4032 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4033 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4034 4035 The DIAGONAL portion of the local submatrix of a processor can be defined 4036 as the submatrix which is obtained by extraction the part corresponding to 4037 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4038 first row that belongs to the processor, r2 is the last row belonging to 4039 the this processor, and c1-c2 is range of indices of the local part of a 4040 vector suitable for applying the matrix to. This is an mxn matrix. In the 4041 common case of a square matrix, the row and column ranges are the same and 4042 the DIAGONAL part is also square. The remaining portion of the local 4043 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4044 4045 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4046 4047 You can call MatGetInfo() to get information on how effective the preallocation was; 4048 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4049 You can also run with the option -info and look for messages with the string 4050 malloc in them to see if additional memory allocation was needed. 4051 4052 Example usage: 4053 4054 Consider the following 8x8 matrix with 34 non-zero values, that is 4055 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4056 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4057 as follows: 4058 4059 .vb 4060 1 2 0 | 0 3 0 | 0 4 4061 Proc0 0 5 6 | 7 0 0 | 8 0 4062 9 0 10 | 11 0 0 | 12 0 4063 ------------------------------------- 4064 13 0 14 | 15 16 17 | 0 0 4065 Proc1 0 18 0 | 19 20 21 | 0 0 4066 0 0 0 | 22 23 0 | 24 0 4067 ------------------------------------- 4068 Proc2 25 26 27 | 0 0 28 | 29 0 4069 30 0 0 | 31 32 33 | 0 34 4070 .ve 4071 4072 This can be represented as a collection of submatrices as: 4073 4074 .vb 4075 A B C 4076 D E F 4077 G H I 4078 .ve 4079 4080 Where the submatrices A,B,C are owned by proc0, D,E,F are 4081 owned by proc1, G,H,I are owned by proc2. 4082 4083 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4084 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4085 The 'M','N' parameters are 8,8, and have the same values on all procs. 4086 4087 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4088 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4089 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4090 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4091 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4092 matrix, ans [DF] as another SeqAIJ matrix. 4093 4094 When d_nz, o_nz parameters are specified, d_nz storage elements are 4095 allocated for every row of the local diagonal submatrix, and o_nz 4096 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4097 One way to choose d_nz and o_nz is to use the max nonzerors per local 4098 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4099 In this case, the values of d_nz,o_nz are: 4100 .vb 4101 proc0 : dnz = 2, o_nz = 2 4102 proc1 : dnz = 3, o_nz = 2 4103 proc2 : dnz = 1, o_nz = 4 4104 .ve 4105 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4106 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4107 for proc3. i.e we are using 12+15+10=37 storage locations to store 4108 34 values. 4109 4110 When d_nnz, o_nnz parameters are specified, the storage is specified 4111 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4112 In the above case the values for d_nnz,o_nnz are: 4113 .vb 4114 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4115 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4116 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4117 .ve 4118 Here the space allocated is sum of all the above values i.e 34, and 4119 hence pre-allocation is perfect. 4120 4121 Level: intermediate 4122 4123 .keywords: matrix, aij, compressed row, sparse, parallel 4124 4125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4126 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4127 @*/ 4128 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4129 { 4130 PetscErrorCode ierr; 4131 4132 PetscFunctionBegin; 4133 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4134 PetscValidType(B,1); 4135 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4136 PetscFunctionReturn(0); 4137 } 4138 4139 /*@ 4140 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4141 CSR format the local rows. 4142 4143 Collective on MPI_Comm 4144 4145 Input Parameters: 4146 + comm - MPI communicator 4147 . m - number of local rows (Cannot be PETSC_DECIDE) 4148 . n - This value should be the same as the local size used in creating the 4149 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4150 calculated if N is given) For square matrices n is almost always m. 4151 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4152 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4153 . i - row indices 4154 . j - column indices 4155 - a - matrix values 4156 4157 Output Parameter: 4158 . mat - the matrix 4159 4160 Level: intermediate 4161 4162 Notes: 4163 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4164 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4165 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4166 4167 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4168 4169 The format which is used for the sparse matrix input, is equivalent to a 4170 row-major ordering.. i.e for the following matrix, the input data expected is 4171 as shown 4172 4173 $ 1 0 0 4174 $ 2 0 3 P0 4175 $ ------- 4176 $ 4 5 6 P1 4177 $ 4178 $ Process0 [P0]: rows_owned=[0,1] 4179 $ i = {0,1,3} [size = nrow+1 = 2+1] 4180 $ j = {0,0,2} [size = 3] 4181 $ v = {1,2,3} [size = 3] 4182 $ 4183 $ Process1 [P1]: rows_owned=[2] 4184 $ i = {0,3} [size = nrow+1 = 1+1] 4185 $ j = {0,1,2} [size = 3] 4186 $ v = {4,5,6} [size = 3] 4187 4188 .keywords: matrix, aij, compressed row, sparse, parallel 4189 4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4191 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4192 @*/ 4193 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4194 { 4195 PetscErrorCode ierr; 4196 4197 PetscFunctionBegin; 4198 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4199 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4200 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4201 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4202 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4203 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4204 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4205 PetscFunctionReturn(0); 4206 } 4207 4208 /*@C 4209 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4210 (the default parallel PETSc format). For good matrix assembly performance 4211 the user should preallocate the matrix storage by setting the parameters 4212 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4213 performance can be increased by more than a factor of 50. 4214 4215 Collective on MPI_Comm 4216 4217 Input Parameters: 4218 + comm - MPI communicator 4219 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4220 This value should be the same as the local size used in creating the 4221 y vector for the matrix-vector product y = Ax. 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4226 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4227 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4228 (same value is used for all local rows) 4229 . d_nnz - array containing the number of nonzeros in the various rows of the 4230 DIAGONAL portion of the local submatrix (possibly different for each row) 4231 or NULL, if d_nz is used to specify the nonzero structure. 4232 The size of this array is equal to the number of local rows, i.e 'm'. 4233 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4234 submatrix (same value is used for all local rows). 4235 - o_nnz - array containing the number of nonzeros in the various rows of the 4236 OFF-DIAGONAL portion of the local submatrix (possibly different for 4237 each row) or NULL, if o_nz is used to specify the nonzero 4238 structure. The size of this array is equal to the number 4239 of local rows, i.e 'm'. 4240 4241 Output Parameter: 4242 . A - the matrix 4243 4244 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4245 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4246 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4247 4248 Notes: 4249 If the *_nnz parameter is given then the *_nz parameter is ignored 4250 4251 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4252 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4253 storage requirements for this matrix. 4254 4255 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4256 processor than it must be used on all processors that share the object for 4257 that argument. 4258 4259 The user MUST specify either the local or global matrix dimensions 4260 (possibly both). 4261 4262 The parallel matrix is partitioned across processors such that the 4263 first m0 rows belong to process 0, the next m1 rows belong to 4264 process 1, the next m2 rows belong to process 2 etc.. where 4265 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4266 values corresponding to [m x N] submatrix. 4267 4268 The columns are logically partitioned with the n0 columns belonging 4269 to 0th partition, the next n1 columns belonging to the next 4270 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4271 4272 The DIAGONAL portion of the local submatrix on any given processor 4273 is the submatrix corresponding to the rows and columns m,n 4274 corresponding to the given processor. i.e diagonal matrix on 4275 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4276 etc. The remaining portion of the local submatrix [m x (N-n)] 4277 constitute the OFF-DIAGONAL portion. The example below better 4278 illustrates this concept. 4279 4280 For a square global matrix we define each processor's diagonal portion 4281 to be its local rows and the corresponding columns (a square submatrix); 4282 each processor's off-diagonal portion encompasses the remainder of the 4283 local matrix (a rectangular submatrix). 4284 4285 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4286 4287 When calling this routine with a single process communicator, a matrix of 4288 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4289 type of communicator, use the construction mechanism 4290 .vb 4291 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4292 .ve 4293 4294 $ MatCreate(...,&A); 4295 $ MatSetType(A,MATMPIAIJ); 4296 $ MatSetSizes(A, m,n,M,N); 4297 $ MatMPIAIJSetPreallocation(A,...); 4298 4299 By default, this format uses inodes (identical nodes) when possible. 4300 We search for consecutive rows with the same nonzero structure, thereby 4301 reusing matrix information to achieve increased efficiency. 4302 4303 Options Database Keys: 4304 + -mat_no_inode - Do not use inodes 4305 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4306 4307 4308 4309 Example usage: 4310 4311 Consider the following 8x8 matrix with 34 non-zero values, that is 4312 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4313 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4314 as follows 4315 4316 .vb 4317 1 2 0 | 0 3 0 | 0 4 4318 Proc0 0 5 6 | 7 0 0 | 8 0 4319 9 0 10 | 11 0 0 | 12 0 4320 ------------------------------------- 4321 13 0 14 | 15 16 17 | 0 0 4322 Proc1 0 18 0 | 19 20 21 | 0 0 4323 0 0 0 | 22 23 0 | 24 0 4324 ------------------------------------- 4325 Proc2 25 26 27 | 0 0 28 | 29 0 4326 30 0 0 | 31 32 33 | 0 34 4327 .ve 4328 4329 This can be represented as a collection of submatrices as 4330 4331 .vb 4332 A B C 4333 D E F 4334 G H I 4335 .ve 4336 4337 Where the submatrices A,B,C are owned by proc0, D,E,F are 4338 owned by proc1, G,H,I are owned by proc2. 4339 4340 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4341 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4342 The 'M','N' parameters are 8,8, and have the same values on all procs. 4343 4344 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4345 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4346 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4347 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4348 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4349 matrix, ans [DF] as another SeqAIJ matrix. 4350 4351 When d_nz, o_nz parameters are specified, d_nz storage elements are 4352 allocated for every row of the local diagonal submatrix, and o_nz 4353 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4354 One way to choose d_nz and o_nz is to use the max nonzerors per local 4355 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4356 In this case, the values of d_nz,o_nz are 4357 .vb 4358 proc0 : dnz = 2, o_nz = 2 4359 proc1 : dnz = 3, o_nz = 2 4360 proc2 : dnz = 1, o_nz = 4 4361 .ve 4362 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4363 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4364 for proc3. i.e we are using 12+15+10=37 storage locations to store 4365 34 values. 4366 4367 When d_nnz, o_nnz parameters are specified, the storage is specified 4368 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4369 In the above case the values for d_nnz,o_nnz are 4370 .vb 4371 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4372 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4373 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4374 .ve 4375 Here the space allocated is sum of all the above values i.e 34, and 4376 hence pre-allocation is perfect. 4377 4378 Level: intermediate 4379 4380 .keywords: matrix, aij, compressed row, sparse, parallel 4381 4382 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4383 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4384 @*/ 4385 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4386 { 4387 PetscErrorCode ierr; 4388 PetscMPIInt size; 4389 4390 PetscFunctionBegin; 4391 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4392 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4393 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4394 if (size > 1) { 4395 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4396 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4397 } else { 4398 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4399 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4400 } 4401 PetscFunctionReturn(0); 4402 } 4403 4404 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4405 { 4406 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4407 PetscBool flg; 4408 PetscErrorCode ierr; 4409 4410 PetscFunctionBegin; 4411 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4412 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4413 if (Ad) *Ad = a->A; 4414 if (Ao) *Ao = a->B; 4415 if (colmap) *colmap = a->garray; 4416 PetscFunctionReturn(0); 4417 } 4418 4419 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4420 { 4421 PetscErrorCode ierr; 4422 PetscInt m,N,i,rstart,nnz,Ii; 4423 PetscInt *indx; 4424 PetscScalar *values; 4425 4426 PetscFunctionBegin; 4427 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4428 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4429 PetscInt *dnz,*onz,sum,bs,cbs; 4430 4431 if (n == PETSC_DECIDE) { 4432 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4433 } 4434 /* Check sum(n) = N */ 4435 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4436 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4437 4438 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4439 rstart -= m; 4440 4441 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4442 for (i=0; i<m; i++) { 4443 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4444 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4445 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4446 } 4447 4448 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4449 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4450 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4451 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4452 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4453 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4454 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4455 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4456 } 4457 4458 /* numeric phase */ 4459 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4460 for (i=0; i<m; i++) { 4461 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4462 Ii = i + rstart; 4463 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4464 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4465 } 4466 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4467 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4472 { 4473 PetscErrorCode ierr; 4474 PetscMPIInt rank; 4475 PetscInt m,N,i,rstart,nnz; 4476 size_t len; 4477 const PetscInt *indx; 4478 PetscViewer out; 4479 char *name; 4480 Mat B; 4481 const PetscScalar *values; 4482 4483 PetscFunctionBegin; 4484 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4485 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4486 /* Should this be the type of the diagonal block of A? */ 4487 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4488 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4489 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4490 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4491 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4492 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4493 for (i=0; i<m; i++) { 4494 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4495 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4496 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4497 } 4498 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4499 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 4501 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4502 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4503 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4504 sprintf(name,"%s.%d",outfile,rank); 4505 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4506 ierr = PetscFree(name);CHKERRQ(ierr); 4507 ierr = MatView(B,out);CHKERRQ(ierr); 4508 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4509 ierr = MatDestroy(&B);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4514 { 4515 PetscErrorCode ierr; 4516 Mat_Merge_SeqsToMPI *merge; 4517 PetscContainer container; 4518 4519 PetscFunctionBegin; 4520 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4521 if (container) { 4522 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4523 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4535 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4536 ierr = PetscFree(merge);CHKERRQ(ierr); 4537 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4538 } 4539 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4540 PetscFunctionReturn(0); 4541 } 4542 4543 #include <../src/mat/utils/freespace.h> 4544 #include <petscbt.h> 4545 4546 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4547 { 4548 PetscErrorCode ierr; 4549 MPI_Comm comm; 4550 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4551 PetscMPIInt size,rank,taga,*len_s; 4552 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4553 PetscInt proc,m; 4554 PetscInt **buf_ri,**buf_rj; 4555 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4556 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4557 MPI_Request *s_waits,*r_waits; 4558 MPI_Status *status; 4559 MatScalar *aa=a->a; 4560 MatScalar **abuf_r,*ba_i; 4561 Mat_Merge_SeqsToMPI *merge; 4562 PetscContainer container; 4563 4564 PetscFunctionBegin; 4565 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4566 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4567 4568 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4570 4571 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4572 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4573 4574 bi = merge->bi; 4575 bj = merge->bj; 4576 buf_ri = merge->buf_ri; 4577 buf_rj = merge->buf_rj; 4578 4579 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4580 owners = merge->rowmap->range; 4581 len_s = merge->len_s; 4582 4583 /* send and recv matrix values */ 4584 /*-----------------------------*/ 4585 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4586 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4587 4588 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4589 for (proc=0,k=0; proc<size; proc++) { 4590 if (!len_s[proc]) continue; 4591 i = owners[proc]; 4592 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4593 k++; 4594 } 4595 4596 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4597 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4598 ierr = PetscFree(status);CHKERRQ(ierr); 4599 4600 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4601 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4602 4603 /* insert mat values of mpimat */ 4604 /*----------------------------*/ 4605 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4606 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4607 4608 for (k=0; k<merge->nrecv; k++) { 4609 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4610 nrows = *(buf_ri_k[k]); 4611 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4612 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4613 } 4614 4615 /* set values of ba */ 4616 m = merge->rowmap->n; 4617 for (i=0; i<m; i++) { 4618 arow = owners[rank] + i; 4619 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4620 bnzi = bi[i+1] - bi[i]; 4621 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4622 4623 /* add local non-zero vals of this proc's seqmat into ba */ 4624 anzi = ai[arow+1] - ai[arow]; 4625 aj = a->j + ai[arow]; 4626 aa = a->a + ai[arow]; 4627 nextaj = 0; 4628 for (j=0; nextaj<anzi; j++) { 4629 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4630 ba_i[j] += aa[nextaj++]; 4631 } 4632 } 4633 4634 /* add received vals into ba */ 4635 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4636 /* i-th row */ 4637 if (i == *nextrow[k]) { 4638 anzi = *(nextai[k]+1) - *nextai[k]; 4639 aj = buf_rj[k] + *(nextai[k]); 4640 aa = abuf_r[k] + *(nextai[k]); 4641 nextaj = 0; 4642 for (j=0; nextaj<anzi; j++) { 4643 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4644 ba_i[j] += aa[nextaj++]; 4645 } 4646 } 4647 nextrow[k]++; nextai[k]++; 4648 } 4649 } 4650 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4651 } 4652 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 4655 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4656 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4657 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4658 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4659 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4664 { 4665 PetscErrorCode ierr; 4666 Mat B_mpi; 4667 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4668 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4669 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4670 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4671 PetscInt len,proc,*dnz,*onz,bs,cbs; 4672 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4673 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4674 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4675 MPI_Status *status; 4676 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4677 PetscBT lnkbt; 4678 Mat_Merge_SeqsToMPI *merge; 4679 PetscContainer container; 4680 4681 PetscFunctionBegin; 4682 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4683 4684 /* make sure it is a PETSc comm */ 4685 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4686 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4687 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4688 4689 ierr = PetscNew(&merge);CHKERRQ(ierr); 4690 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4691 4692 /* determine row ownership */ 4693 /*---------------------------------------------------------*/ 4694 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4695 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4696 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4697 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4698 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4699 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4700 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4701 4702 m = merge->rowmap->n; 4703 owners = merge->rowmap->range; 4704 4705 /* determine the number of messages to send, their lengths */ 4706 /*---------------------------------------------------------*/ 4707 len_s = merge->len_s; 4708 4709 len = 0; /* length of buf_si[] */ 4710 merge->nsend = 0; 4711 for (proc=0; proc<size; proc++) { 4712 len_si[proc] = 0; 4713 if (proc == rank) { 4714 len_s[proc] = 0; 4715 } else { 4716 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4717 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4718 } 4719 if (len_s[proc]) { 4720 merge->nsend++; 4721 nrows = 0; 4722 for (i=owners[proc]; i<owners[proc+1]; i++) { 4723 if (ai[i+1] > ai[i]) nrows++; 4724 } 4725 len_si[proc] = 2*(nrows+1); 4726 len += len_si[proc]; 4727 } 4728 } 4729 4730 /* determine the number and length of messages to receive for ij-structure */ 4731 /*-------------------------------------------------------------------------*/ 4732 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4733 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4734 4735 /* post the Irecv of j-structure */ 4736 /*-------------------------------*/ 4737 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4738 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4739 4740 /* post the Isend of j-structure */ 4741 /*--------------------------------*/ 4742 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4743 4744 for (proc=0, k=0; proc<size; proc++) { 4745 if (!len_s[proc]) continue; 4746 i = owners[proc]; 4747 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4748 k++; 4749 } 4750 4751 /* receives and sends of j-structure are complete */ 4752 /*------------------------------------------------*/ 4753 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4754 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4755 4756 /* send and recv i-structure */ 4757 /*---------------------------*/ 4758 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4759 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4760 4761 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4762 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4763 for (proc=0,k=0; proc<size; proc++) { 4764 if (!len_s[proc]) continue; 4765 /* form outgoing message for i-structure: 4766 buf_si[0]: nrows to be sent 4767 [1:nrows]: row index (global) 4768 [nrows+1:2*nrows+1]: i-structure index 4769 */ 4770 /*-------------------------------------------*/ 4771 nrows = len_si[proc]/2 - 1; 4772 buf_si_i = buf_si + nrows+1; 4773 buf_si[0] = nrows; 4774 buf_si_i[0] = 0; 4775 nrows = 0; 4776 for (i=owners[proc]; i<owners[proc+1]; i++) { 4777 anzi = ai[i+1] - ai[i]; 4778 if (anzi) { 4779 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4780 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4781 nrows++; 4782 } 4783 } 4784 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4785 k++; 4786 buf_si += len_si[proc]; 4787 } 4788 4789 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4790 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4791 4792 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4793 for (i=0; i<merge->nrecv; i++) { 4794 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4795 } 4796 4797 ierr = PetscFree(len_si);CHKERRQ(ierr); 4798 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4799 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4800 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4801 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4802 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4803 ierr = PetscFree(status);CHKERRQ(ierr); 4804 4805 /* compute a local seq matrix in each processor */ 4806 /*----------------------------------------------*/ 4807 /* allocate bi array and free space for accumulating nonzero column info */ 4808 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4809 bi[0] = 0; 4810 4811 /* create and initialize a linked list */ 4812 nlnk = N+1; 4813 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 4815 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4816 len = ai[owners[rank+1]] - ai[owners[rank]]; 4817 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4818 4819 current_space = free_space; 4820 4821 /* determine symbolic info for each local row */ 4822 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4823 4824 for (k=0; k<merge->nrecv; k++) { 4825 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4826 nrows = *buf_ri_k[k]; 4827 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4828 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4829 } 4830 4831 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4832 len = 0; 4833 for (i=0; i<m; i++) { 4834 bnzi = 0; 4835 /* add local non-zero cols of this proc's seqmat into lnk */ 4836 arow = owners[rank] + i; 4837 anzi = ai[arow+1] - ai[arow]; 4838 aj = a->j + ai[arow]; 4839 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4840 bnzi += nlnk; 4841 /* add received col data into lnk */ 4842 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4843 if (i == *nextrow[k]) { /* i-th row */ 4844 anzi = *(nextai[k]+1) - *nextai[k]; 4845 aj = buf_rj[k] + *nextai[k]; 4846 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4847 bnzi += nlnk; 4848 nextrow[k]++; nextai[k]++; 4849 } 4850 } 4851 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4852 4853 /* if free space is not available, make more free space */ 4854 if (current_space->local_remaining<bnzi) { 4855 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4856 nspacedouble++; 4857 } 4858 /* copy data into free space, then initialize lnk */ 4859 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4860 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4861 4862 current_space->array += bnzi; 4863 current_space->local_used += bnzi; 4864 current_space->local_remaining -= bnzi; 4865 4866 bi[i+1] = bi[i] + bnzi; 4867 } 4868 4869 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4870 4871 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4872 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4873 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4874 4875 /* create symbolic parallel matrix B_mpi */ 4876 /*---------------------------------------*/ 4877 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4878 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4879 if (n==PETSC_DECIDE) { 4880 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4881 } else { 4882 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4883 } 4884 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4885 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4886 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4887 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4888 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4889 4890 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4891 B_mpi->assembled = PETSC_FALSE; 4892 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4893 merge->bi = bi; 4894 merge->bj = bj; 4895 merge->buf_ri = buf_ri; 4896 merge->buf_rj = buf_rj; 4897 merge->coi = NULL; 4898 merge->coj = NULL; 4899 merge->owners_co = NULL; 4900 4901 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4902 4903 /* attach the supporting struct to B_mpi for reuse */ 4904 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4905 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4906 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4907 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4908 *mpimat = B_mpi; 4909 4910 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4911 PetscFunctionReturn(0); 4912 } 4913 4914 /*@C 4915 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4916 matrices from each processor 4917 4918 Collective on MPI_Comm 4919 4920 Input Parameters: 4921 + comm - the communicators the parallel matrix will live on 4922 . seqmat - the input sequential matrices 4923 . m - number of local rows (or PETSC_DECIDE) 4924 . n - number of local columns (or PETSC_DECIDE) 4925 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4926 4927 Output Parameter: 4928 . mpimat - the parallel matrix generated 4929 4930 Level: advanced 4931 4932 Notes: 4933 The dimensions of the sequential matrix in each processor MUST be the same. 4934 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4935 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4936 @*/ 4937 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4938 { 4939 PetscErrorCode ierr; 4940 PetscMPIInt size; 4941 4942 PetscFunctionBegin; 4943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4944 if (size == 1) { 4945 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4946 if (scall == MAT_INITIAL_MATRIX) { 4947 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4948 } else { 4949 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4950 } 4951 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4952 PetscFunctionReturn(0); 4953 } 4954 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4955 if (scall == MAT_INITIAL_MATRIX) { 4956 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4957 } 4958 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4959 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4960 PetscFunctionReturn(0); 4961 } 4962 4963 /*@ 4964 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4965 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4966 with MatGetSize() 4967 4968 Not Collective 4969 4970 Input Parameters: 4971 + A - the matrix 4972 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4973 4974 Output Parameter: 4975 . A_loc - the local sequential matrix generated 4976 4977 Level: developer 4978 4979 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4980 4981 @*/ 4982 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4983 { 4984 PetscErrorCode ierr; 4985 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4986 Mat_SeqAIJ *mat,*a,*b; 4987 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4988 MatScalar *aa,*ba,*cam; 4989 PetscScalar *ca; 4990 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4991 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4992 PetscBool match; 4993 MPI_Comm comm; 4994 PetscMPIInt size; 4995 4996 PetscFunctionBegin; 4997 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4998 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4999 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5001 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5002 5003 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5004 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5005 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5006 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5007 aa = a->a; ba = b->a; 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 if (size == 1) { 5010 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5011 PetscFunctionReturn(0); 5012 } 5013 5014 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5015 ci[0] = 0; 5016 for (i=0; i<am; i++) { 5017 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5018 } 5019 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5020 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5021 k = 0; 5022 for (i=0; i<am; i++) { 5023 ncols_o = bi[i+1] - bi[i]; 5024 ncols_d = ai[i+1] - ai[i]; 5025 /* off-diagonal portion of A */ 5026 for (jo=0; jo<ncols_o; jo++) { 5027 col = cmap[*bj]; 5028 if (col >= cstart) break; 5029 cj[k] = col; bj++; 5030 ca[k++] = *ba++; 5031 } 5032 /* diagonal portion of A */ 5033 for (j=0; j<ncols_d; j++) { 5034 cj[k] = cstart + *aj++; 5035 ca[k++] = *aa++; 5036 } 5037 /* off-diagonal portion of A */ 5038 for (j=jo; j<ncols_o; j++) { 5039 cj[k] = cmap[*bj++]; 5040 ca[k++] = *ba++; 5041 } 5042 } 5043 /* put together the new matrix */ 5044 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5045 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5046 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5047 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5048 mat->free_a = PETSC_TRUE; 5049 mat->free_ij = PETSC_TRUE; 5050 mat->nonew = 0; 5051 } else if (scall == MAT_REUSE_MATRIX) { 5052 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5053 ci = mat->i; cj = mat->j; cam = mat->a; 5054 for (i=0; i<am; i++) { 5055 /* off-diagonal portion of A */ 5056 ncols_o = bi[i+1] - bi[i]; 5057 for (jo=0; jo<ncols_o; jo++) { 5058 col = cmap[*bj]; 5059 if (col >= cstart) break; 5060 *cam++ = *ba++; bj++; 5061 } 5062 /* diagonal portion of A */ 5063 ncols_d = ai[i+1] - ai[i]; 5064 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5065 /* off-diagonal portion of A */ 5066 for (j=jo; j<ncols_o; j++) { 5067 *cam++ = *ba++; bj++; 5068 } 5069 } 5070 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5071 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 /*@C 5076 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5077 5078 Not Collective 5079 5080 Input Parameters: 5081 + A - the matrix 5082 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5083 - row, col - index sets of rows and columns to extract (or NULL) 5084 5085 Output Parameter: 5086 . A_loc - the local sequential matrix generated 5087 5088 Level: developer 5089 5090 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5091 5092 @*/ 5093 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5094 { 5095 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5096 PetscErrorCode ierr; 5097 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5098 IS isrowa,iscola; 5099 Mat *aloc; 5100 PetscBool match; 5101 5102 PetscFunctionBegin; 5103 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5104 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5105 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5106 if (!row) { 5107 start = A->rmap->rstart; end = A->rmap->rend; 5108 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5109 } else { 5110 isrowa = *row; 5111 } 5112 if (!col) { 5113 start = A->cmap->rstart; 5114 cmap = a->garray; 5115 nzA = a->A->cmap->n; 5116 nzB = a->B->cmap->n; 5117 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5118 ncols = 0; 5119 for (i=0; i<nzB; i++) { 5120 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5121 else break; 5122 } 5123 imark = i; 5124 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5125 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5126 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5127 } else { 5128 iscola = *col; 5129 } 5130 if (scall != MAT_INITIAL_MATRIX) { 5131 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5132 aloc[0] = *A_loc; 5133 } 5134 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5135 if (!col) { /* attach global id of condensed columns */ 5136 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5137 } 5138 *A_loc = aloc[0]; 5139 ierr = PetscFree(aloc);CHKERRQ(ierr); 5140 if (!row) { 5141 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5142 } 5143 if (!col) { 5144 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5145 } 5146 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 /*@C 5151 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5152 5153 Collective on Mat 5154 5155 Input Parameters: 5156 + A,B - the matrices in mpiaij format 5157 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5159 5160 Output Parameter: 5161 + rowb, colb - index sets of rows and columns of B to extract 5162 - B_seq - the sequential matrix generated 5163 5164 Level: developer 5165 5166 @*/ 5167 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5168 { 5169 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5170 PetscErrorCode ierr; 5171 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5172 IS isrowb,iscolb; 5173 Mat *bseq=NULL; 5174 5175 PetscFunctionBegin; 5176 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5177 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5178 } 5179 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5180 5181 if (scall == MAT_INITIAL_MATRIX) { 5182 start = A->cmap->rstart; 5183 cmap = a->garray; 5184 nzA = a->A->cmap->n; 5185 nzB = a->B->cmap->n; 5186 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5187 ncols = 0; 5188 for (i=0; i<nzB; i++) { /* row < local row index */ 5189 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5190 else break; 5191 } 5192 imark = i; 5193 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5194 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5195 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5196 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5197 } else { 5198 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5199 isrowb = *rowb; iscolb = *colb; 5200 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5201 bseq[0] = *B_seq; 5202 } 5203 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5204 *B_seq = bseq[0]; 5205 ierr = PetscFree(bseq);CHKERRQ(ierr); 5206 if (!rowb) { 5207 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5208 } else { 5209 *rowb = isrowb; 5210 } 5211 if (!colb) { 5212 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5213 } else { 5214 *colb = iscolb; 5215 } 5216 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5217 PetscFunctionReturn(0); 5218 } 5219 5220 /* 5221 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5222 of the OFF-DIAGONAL portion of local A 5223 5224 Collective on Mat 5225 5226 Input Parameters: 5227 + A,B - the matrices in mpiaij format 5228 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5229 5230 Output Parameter: 5231 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5232 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5233 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5234 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5235 5236 Level: developer 5237 5238 */ 5239 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5240 { 5241 VecScatter_MPI_General *gen_to,*gen_from; 5242 PetscErrorCode ierr; 5243 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5244 Mat_SeqAIJ *b_oth; 5245 VecScatter ctx; 5246 MPI_Comm comm; 5247 PetscMPIInt *rprocs,*sprocs,tag,rank; 5248 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5249 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5250 PetscScalar *b_otha,*bufa,*bufA,*vals; 5251 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5252 MPI_Request *rwaits = NULL,*swaits = NULL; 5253 MPI_Status *sstatus,rstatus; 5254 PetscMPIInt jj,size; 5255 VecScatterType type; 5256 PetscBool mpi1; 5257 5258 PetscFunctionBegin; 5259 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5260 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5261 5262 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5263 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5264 } 5265 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5266 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5267 5268 if (size == 1) { 5269 startsj_s = NULL; 5270 bufa_ptr = NULL; 5271 *B_oth = NULL; 5272 PetscFunctionReturn(0); 5273 } 5274 5275 ctx = a->Mvctx; 5276 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5277 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5278 if (!mpi1) { 5279 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5280 thus create a->Mvctx_mpi1 */ 5281 if (!a->Mvctx_mpi1) { 5282 a->Mvctx_mpi1_flg = PETSC_TRUE; 5283 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5284 } 5285 ctx = a->Mvctx_mpi1; 5286 } 5287 tag = ((PetscObject)ctx)->tag; 5288 5289 gen_to = (VecScatter_MPI_General*)ctx->todata; 5290 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5291 nrecvs = gen_from->n; 5292 nsends = gen_to->n; 5293 5294 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5295 srow = gen_to->indices; /* local row index to be sent */ 5296 sstarts = gen_to->starts; 5297 sprocs = gen_to->procs; 5298 sstatus = gen_to->sstatus; 5299 sbs = gen_to->bs; 5300 rstarts = gen_from->starts; 5301 rprocs = gen_from->procs; 5302 rbs = gen_from->bs; 5303 5304 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5305 if (scall == MAT_INITIAL_MATRIX) { 5306 /* i-array */ 5307 /*---------*/ 5308 /* post receives */ 5309 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5310 for (i=0; i<nrecvs; i++) { 5311 rowlen = rvalues + rstarts[i]*rbs; 5312 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5313 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5314 } 5315 5316 /* pack the outgoing message */ 5317 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5318 5319 sstartsj[0] = 0; 5320 rstartsj[0] = 0; 5321 len = 0; /* total length of j or a array to be sent */ 5322 k = 0; 5323 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5324 for (i=0; i<nsends; i++) { 5325 rowlen = svalues + sstarts[i]*sbs; 5326 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5327 for (j=0; j<nrows; j++) { 5328 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5329 for (l=0; l<sbs; l++) { 5330 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5331 5332 rowlen[j*sbs+l] = ncols; 5333 5334 len += ncols; 5335 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5336 } 5337 k++; 5338 } 5339 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5340 5341 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5342 } 5343 /* recvs and sends of i-array are completed */ 5344 i = nrecvs; 5345 while (i--) { 5346 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5347 } 5348 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5349 ierr = PetscFree(svalues);CHKERRQ(ierr); 5350 5351 /* allocate buffers for sending j and a arrays */ 5352 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5354 5355 /* create i-array of B_oth */ 5356 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5357 5358 b_othi[0] = 0; 5359 len = 0; /* total length of j or a array to be received */ 5360 k = 0; 5361 for (i=0; i<nrecvs; i++) { 5362 rowlen = rvalues + rstarts[i]*rbs; 5363 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5364 for (j=0; j<nrows; j++) { 5365 b_othi[k+1] = b_othi[k] + rowlen[j]; 5366 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5367 k++; 5368 } 5369 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5370 } 5371 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5372 5373 /* allocate space for j and a arrrays of B_oth */ 5374 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5375 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5376 5377 /* j-array */ 5378 /*---------*/ 5379 /* post receives of j-array */ 5380 for (i=0; i<nrecvs; i++) { 5381 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5382 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5383 } 5384 5385 /* pack the outgoing message j-array */ 5386 k = 0; 5387 for (i=0; i<nsends; i++) { 5388 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5389 bufJ = bufj+sstartsj[i]; 5390 for (j=0; j<nrows; j++) { 5391 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5392 for (ll=0; ll<sbs; ll++) { 5393 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5394 for (l=0; l<ncols; l++) { 5395 *bufJ++ = cols[l]; 5396 } 5397 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5398 } 5399 } 5400 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* recvs and sends of j-array are completed */ 5404 i = nrecvs; 5405 while (i--) { 5406 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5407 } 5408 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5409 } else if (scall == MAT_REUSE_MATRIX) { 5410 sstartsj = *startsj_s; 5411 rstartsj = *startsj_r; 5412 bufa = *bufa_ptr; 5413 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5414 b_otha = b_oth->a; 5415 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5416 5417 /* a-array */ 5418 /*---------*/ 5419 /* post receives of a-array */ 5420 for (i=0; i<nrecvs; i++) { 5421 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5422 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5423 } 5424 5425 /* pack the outgoing message a-array */ 5426 k = 0; 5427 for (i=0; i<nsends; i++) { 5428 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5429 bufA = bufa+sstartsj[i]; 5430 for (j=0; j<nrows; j++) { 5431 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5432 for (ll=0; ll<sbs; ll++) { 5433 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5434 for (l=0; l<ncols; l++) { 5435 *bufA++ = vals[l]; 5436 } 5437 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5438 } 5439 } 5440 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5441 } 5442 /* recvs and sends of a-array are completed */ 5443 i = nrecvs; 5444 while (i--) { 5445 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5446 } 5447 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5448 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5449 5450 if (scall == MAT_INITIAL_MATRIX) { 5451 /* put together the new matrix */ 5452 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5453 5454 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5455 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5456 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5457 b_oth->free_a = PETSC_TRUE; 5458 b_oth->free_ij = PETSC_TRUE; 5459 b_oth->nonew = 0; 5460 5461 ierr = PetscFree(bufj);CHKERRQ(ierr); 5462 if (!startsj_s || !bufa_ptr) { 5463 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5464 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5465 } else { 5466 *startsj_s = sstartsj; 5467 *startsj_r = rstartsj; 5468 *bufa_ptr = bufa; 5469 } 5470 } 5471 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5472 PetscFunctionReturn(0); 5473 } 5474 5475 /*@C 5476 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5477 5478 Not Collective 5479 5480 Input Parameters: 5481 . A - The matrix in mpiaij format 5482 5483 Output Parameter: 5484 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5485 . colmap - A map from global column index to local index into lvec 5486 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5487 5488 Level: developer 5489 5490 @*/ 5491 #if defined(PETSC_USE_CTABLE) 5492 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5493 #else 5494 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5495 #endif 5496 { 5497 Mat_MPIAIJ *a; 5498 5499 PetscFunctionBegin; 5500 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5501 PetscValidPointer(lvec, 2); 5502 PetscValidPointer(colmap, 3); 5503 PetscValidPointer(multScatter, 4); 5504 a = (Mat_MPIAIJ*) A->data; 5505 if (lvec) *lvec = a->lvec; 5506 if (colmap) *colmap = a->colmap; 5507 if (multScatter) *multScatter = a->Mvctx; 5508 PetscFunctionReturn(0); 5509 } 5510 5511 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5513 #if defined(PETSC_HAVE_MKL_SPARSE) 5514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5515 #endif 5516 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5517 #if defined(PETSC_HAVE_ELEMENTAL) 5518 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_HYPRE) 5521 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5522 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5523 #endif 5524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5526 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5527 5528 /* 5529 Computes (B'*A')' since computing B*A directly is untenable 5530 5531 n p p 5532 ( ) ( ) ( ) 5533 m ( A ) * n ( B ) = m ( C ) 5534 ( ) ( ) ( ) 5535 5536 */ 5537 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5538 { 5539 PetscErrorCode ierr; 5540 Mat At,Bt,Ct; 5541 5542 PetscFunctionBegin; 5543 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5544 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5545 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5546 ierr = MatDestroy(&At);CHKERRQ(ierr); 5547 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5548 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5549 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 5553 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5554 { 5555 PetscErrorCode ierr; 5556 PetscInt m=A->rmap->n,n=B->cmap->n; 5557 Mat Cmat; 5558 5559 PetscFunctionBegin; 5560 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5561 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5562 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5563 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5564 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5565 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5566 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5567 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5568 5569 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5570 5571 *C = Cmat; 5572 PetscFunctionReturn(0); 5573 } 5574 5575 /* ----------------------------------------------------------------*/ 5576 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5577 { 5578 PetscErrorCode ierr; 5579 5580 PetscFunctionBegin; 5581 if (scall == MAT_INITIAL_MATRIX) { 5582 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5583 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5584 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5585 } 5586 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5587 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5588 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5589 PetscFunctionReturn(0); 5590 } 5591 5592 /*MC 5593 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5594 5595 Options Database Keys: 5596 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5597 5598 Level: beginner 5599 5600 .seealso: MatCreateAIJ() 5601 M*/ 5602 5603 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5604 { 5605 Mat_MPIAIJ *b; 5606 PetscErrorCode ierr; 5607 PetscMPIInt size; 5608 5609 PetscFunctionBegin; 5610 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5611 5612 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5613 B->data = (void*)b; 5614 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5615 B->assembled = PETSC_FALSE; 5616 B->insertmode = NOT_SET_VALUES; 5617 b->size = size; 5618 5619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5620 5621 /* build cache for off array entries formed */ 5622 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5623 5624 b->donotstash = PETSC_FALSE; 5625 b->colmap = 0; 5626 b->garray = 0; 5627 b->roworiented = PETSC_TRUE; 5628 5629 /* stuff used for matrix vector multiply */ 5630 b->lvec = NULL; 5631 b->Mvctx = NULL; 5632 5633 /* stuff for MatGetRow() */ 5634 b->rowindices = 0; 5635 b->rowvalues = 0; 5636 b->getrowactive = PETSC_FALSE; 5637 5638 /* flexible pointer used in CUSP/CUSPARSE classes */ 5639 b->spptr = NULL; 5640 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5650 #if defined(PETSC_HAVE_MKL_SPARSE) 5651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5652 #endif 5653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5655 #if defined(PETSC_HAVE_ELEMENTAL) 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5657 #endif 5658 #if defined(PETSC_HAVE_HYPRE) 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5660 #endif 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 #if defined(PETSC_HAVE_HYPRE) 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5668 #endif 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5671 PetscFunctionReturn(0); 5672 } 5673 5674 /*@C 5675 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5676 and "off-diagonal" part of the matrix in CSR format. 5677 5678 Collective on MPI_Comm 5679 5680 Input Parameters: 5681 + comm - MPI communicator 5682 . m - number of local rows (Cannot be PETSC_DECIDE) 5683 . n - This value should be the same as the local size used in creating the 5684 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5685 calculated if N is given) For square matrices n is almost always m. 5686 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5687 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5688 . i - row indices for "diagonal" portion of matrix 5689 . j - column indices 5690 . a - matrix values 5691 . oi - row indices for "off-diagonal" portion of matrix 5692 . oj - column indices 5693 - oa - matrix values 5694 5695 Output Parameter: 5696 . mat - the matrix 5697 5698 Level: advanced 5699 5700 Notes: 5701 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5702 must free the arrays once the matrix has been destroyed and not before. 5703 5704 The i and j indices are 0 based 5705 5706 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5707 5708 This sets local rows and cannot be used to set off-processor values. 5709 5710 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5711 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5712 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5713 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5714 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5715 communication if it is known that only local entries will be set. 5716 5717 .keywords: matrix, aij, compressed row, sparse, parallel 5718 5719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5720 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5721 @*/ 5722 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5723 { 5724 PetscErrorCode ierr; 5725 Mat_MPIAIJ *maij; 5726 5727 PetscFunctionBegin; 5728 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5729 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5730 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5731 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5732 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5733 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5734 maij = (Mat_MPIAIJ*) (*mat)->data; 5735 5736 (*mat)->preallocated = PETSC_TRUE; 5737 5738 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5739 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5740 5741 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5743 5744 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5745 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 5749 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5750 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5751 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5753 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5754 PetscFunctionReturn(0); 5755 } 5756 5757 /* 5758 Special version for direct calls from Fortran 5759 */ 5760 #include <petsc/private/fortranimpl.h> 5761 5762 /* Change these macros so can be used in void function */ 5763 #undef CHKERRQ 5764 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5765 #undef SETERRQ2 5766 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5767 #undef SETERRQ3 5768 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5769 #undef SETERRQ 5770 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5771 5772 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5773 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5774 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5775 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5776 #else 5777 #endif 5778 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5779 { 5780 Mat mat = *mmat; 5781 PetscInt m = *mm, n = *mn; 5782 InsertMode addv = *maddv; 5783 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5784 PetscScalar value; 5785 PetscErrorCode ierr; 5786 5787 MatCheckPreallocated(mat,1); 5788 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5789 5790 #if defined(PETSC_USE_DEBUG) 5791 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5792 #endif 5793 { 5794 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5795 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5796 PetscBool roworiented = aij->roworiented; 5797 5798 /* Some Variables required in the macro */ 5799 Mat A = aij->A; 5800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5801 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5802 MatScalar *aa = a->a; 5803 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5804 Mat B = aij->B; 5805 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5806 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5807 MatScalar *ba = b->a; 5808 5809 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5810 PetscInt nonew = a->nonew; 5811 MatScalar *ap1,*ap2; 5812 5813 PetscFunctionBegin; 5814 for (i=0; i<m; i++) { 5815 if (im[i] < 0) continue; 5816 #if defined(PETSC_USE_DEBUG) 5817 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5818 #endif 5819 if (im[i] >= rstart && im[i] < rend) { 5820 row = im[i] - rstart; 5821 lastcol1 = -1; 5822 rp1 = aj + ai[row]; 5823 ap1 = aa + ai[row]; 5824 rmax1 = aimax[row]; 5825 nrow1 = ailen[row]; 5826 low1 = 0; 5827 high1 = nrow1; 5828 lastcol2 = -1; 5829 rp2 = bj + bi[row]; 5830 ap2 = ba + bi[row]; 5831 rmax2 = bimax[row]; 5832 nrow2 = bilen[row]; 5833 low2 = 0; 5834 high2 = nrow2; 5835 5836 for (j=0; j<n; j++) { 5837 if (roworiented) value = v[i*n+j]; 5838 else value = v[i+j*m]; 5839 if (in[j] >= cstart && in[j] < cend) { 5840 col = in[j] - cstart; 5841 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5842 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5843 } else if (in[j] < 0) continue; 5844 #if defined(PETSC_USE_DEBUG) 5845 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5846 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5847 #endif 5848 else { 5849 if (mat->was_assembled) { 5850 if (!aij->colmap) { 5851 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5852 } 5853 #if defined(PETSC_USE_CTABLE) 5854 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5855 col--; 5856 #else 5857 col = aij->colmap[in[j]] - 1; 5858 #endif 5859 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5860 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5861 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5862 col = in[j]; 5863 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5864 B = aij->B; 5865 b = (Mat_SeqAIJ*)B->data; 5866 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5867 rp2 = bj + bi[row]; 5868 ap2 = ba + bi[row]; 5869 rmax2 = bimax[row]; 5870 nrow2 = bilen[row]; 5871 low2 = 0; 5872 high2 = nrow2; 5873 bm = aij->B->rmap->n; 5874 ba = b->a; 5875 } 5876 } else col = in[j]; 5877 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5878 } 5879 } 5880 } else if (!aij->donotstash) { 5881 if (roworiented) { 5882 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5883 } else { 5884 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5885 } 5886 } 5887 } 5888 } 5889 PetscFunctionReturnVoid(); 5890 } 5891