1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 /* Easy test: symmetric diagonal block */ 1108 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1109 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1110 if (!*f) PetscFunctionReturn(0); 1111 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 if (size == 1) PetscFunctionReturn(0); 1114 1115 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1116 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1117 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1119 for (i=0; i<first; i++) notme[i] = i; 1120 for (i=last; i<M; i++) notme[i-last+first] = i; 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1122 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1123 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1124 Aoff = Aoffs[0]; 1125 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1126 Boff = Boffs[0]; 1127 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1129 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1131 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1132 ierr = PetscFree(notme);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 /* receive remote parts */ 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1167 { 1168 PetscErrorCode ierr; 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 1171 PetscFunctionBegin; 1172 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1173 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1185 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 #if defined(PETSC_USE_LOG) 1196 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1197 #endif 1198 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1199 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1201 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1202 #if defined(PETSC_USE_CTABLE) 1203 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1204 #else 1205 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1208 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1210 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1211 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1212 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1213 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_ELEMENTAL) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1230 #endif 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1240 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1241 PetscErrorCode ierr; 1242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1243 int fd; 1244 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1245 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1246 PetscScalar *column_values; 1247 PetscInt message_count,flowcontrolcount; 1248 FILE *file; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1253 nz = A->nz + B->nz; 1254 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 if (!rank) { 1256 header[0] = MAT_FILE_CLASSID; 1257 header[1] = mat->rmap->N; 1258 header[2] = mat->cmap->N; 1259 1260 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 /* get largest number of rows any processor has */ 1263 rlen = mat->rmap->n; 1264 range = mat->rmap->range; 1265 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1266 } else { 1267 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 rlen = mat->rmap->n; 1269 } 1270 1271 /* load up the local row counts */ 1272 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1273 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1274 1275 /* store the row lengths to the file */ 1276 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1277 if (!rank) { 1278 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 for (i=1; i<size; i++) { 1280 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1281 rlen = range[i+1] - range[i]; 1282 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1288 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1292 1293 /* load up the local column indices */ 1294 nzmax = nz; /* th processor needs space a largest processor needs */ 1295 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1297 cnt = 0; 1298 for (i=0; i<mat->rmap->n; i++) { 1299 for (j=B->i[i]; j<B->i[i+1]; j++) { 1300 if ((col = garray[B->j[j]]) > cstart) break; 1301 column_indices[cnt++] = col; 1302 } 1303 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1304 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1305 } 1306 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1307 1308 /* store the column indices to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 MPI_Status status; 1312 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1317 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1328 1329 /* load up the local column values */ 1330 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if (garray[B->j[j]] > cstart) break; 1335 column_values[cnt++] = B->a[j]; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1338 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column values to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_values);CHKERRQ(ierr); 1362 1363 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1364 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #include <petscdraw.h> 1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1372 PetscErrorCode ierr; 1373 PetscMPIInt rank = aij->rank,size = aij->size; 1374 PetscBool isdraw,iascii,isbinary; 1375 PetscViewer sviewer; 1376 PetscViewerFormat format; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1382 if (iascii) { 1383 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1384 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1385 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1386 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1387 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1388 for (i=0; i<(PetscInt)size; i++) { 1389 nmax = PetscMax(nmax,nz[i]); 1390 nmin = PetscMin(nmin,nz[i]); 1391 navg += nz[i]; 1392 } 1393 ierr = PetscFree(nz);CHKERRQ(ierr); 1394 navg = navg/size; 1395 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1399 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1400 MatInfo info; 1401 PetscBool inodes; 1402 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1404 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1405 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1407 if (!inodes) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1412 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1413 } 1414 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1421 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1424 PetscInt inodecount,inodelimit,*inodes; 1425 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1426 if (inodes) { 1427 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1433 PetscFunctionReturn(0); 1434 } 1435 } else if (isbinary) { 1436 if (size == 1) { 1437 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1439 } else { 1440 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (isdraw) { 1444 PetscDraw draw; 1445 PetscBool isnull; 1446 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1447 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1448 if (isnull) PetscFunctionReturn(0); 1449 } 1450 1451 { 1452 /* assemble the entire matrix onto first processor. */ 1453 Mat A; 1454 Mat_SeqAIJ *Aloc; 1455 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1456 MatScalar *a; 1457 1458 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1463 } 1464 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1465 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1466 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1467 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1469 1470 /* copy over the A part */ 1471 Aloc = (Mat_SeqAIJ*)aij->A->data; 1472 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 row = mat->rmap->rstart; 1474 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1475 for (i=0; i<m; i++) { 1476 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1477 row++; 1478 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1479 } 1480 aj = Aloc->j; 1481 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1482 1483 /* copy over the B part */ 1484 Aloc = (Mat_SeqAIJ*)aij->B->data; 1485 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 row = mat->rmap->rstart; 1487 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1488 ct = cols; 1489 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1490 for (i=0; i<m; i++) { 1491 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1492 row++; 1493 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1494 } 1495 ierr = PetscFree(ct);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 /* 1499 Everyone has to call to draw the matrix since the graphics waits are 1500 synchronized across all processors that share the PetscDraw object 1501 */ 1502 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1505 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1506 } 1507 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1515 { 1516 PetscErrorCode ierr; 1517 PetscBool iascii,isdraw,issocket,isbinary; 1518 1519 PetscFunctionBegin; 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1524 if (iascii || isdraw || isbinary || issocket) { 1525 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1531 { 1532 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1533 PetscErrorCode ierr; 1534 Vec bb1 = 0; 1535 PetscBool hasop; 1536 1537 PetscFunctionBegin; 1538 if (flag == SOR_APPLY_UPPER) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1544 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1545 } 1546 1547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1548 if (flag & SOR_ZERO_INITIAL_GUESS) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 its--; 1551 } 1552 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_EISENSTAT) { 1597 Vec xx1; 1598 1599 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1601 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (!mat->diag) { 1605 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1606 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1607 } 1608 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1609 if (hasop) { 1610 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1611 } else { 1612 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1613 } 1614 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1615 1616 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1620 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1621 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1622 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1623 1624 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1625 1626 matin->factorerrortype = mat->A->factorerrortype; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1631 { 1632 Mat aA,aB,Aperm; 1633 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1634 PetscScalar *aa,*ba; 1635 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1636 PetscSF rowsf,sf; 1637 IS parcolp = NULL; 1638 PetscBool done; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1644 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1645 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1646 1647 /* Invert row permutation to find out where my rows should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1652 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 1655 /* Invert column permutation to find out where my columns should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1660 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1665 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1666 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1667 1668 /* Find out where my gcols should go */ 1669 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1670 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1672 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1673 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1674 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1676 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1677 1678 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1680 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1681 for (i=0; i<m; i++) { 1682 PetscInt row = rdest[i],rowner; 1683 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1684 for (j=ai[i]; j<ai[i+1]; j++) { 1685 PetscInt cowner,col = cdest[aj[j]]; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 for (j=bi[i]; j<bi[i+1]; j++) { 1691 PetscInt cowner,col = gcdest[bj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscReal isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 MatCheckPreallocated(A,1); 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 case MAT_STRUCTURALLY_SYMMETRIC: 1838 MatCheckPreallocated(A,1); 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_SYMMETRY_ETERNAL: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2019 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2020 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2021 PetscErrorCode ierr; 2022 Mat B,A_diag,*B_diag; 2023 MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 } 2062 2063 b = (Mat_MPIAIJ*)B->data; 2064 A_diag = a->A; 2065 B_diag = &b->A; 2066 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2067 A_diag_ncol = A_diag->cmap->N; 2068 B_diag_ilen = sub_B_diag->ilen; 2069 B_diag_i = sub_B_diag->i; 2070 2071 /* Set ilen for diagonal of B */ 2072 for (i=0; i<A_diag_ncol; i++) { 2073 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2074 } 2075 2076 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2077 very quickly (=without using MatSetValues), because all writes are local. */ 2078 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2079 2080 /* copy over the B part */ 2081 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2082 array = Bloc->a; 2083 row = A->rmap->rstart; 2084 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2085 cols_tmp = cols; 2086 for (i=0; i<mb; i++) { 2087 ncol = bi[i+1]-bi[i]; 2088 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2089 row++; 2090 array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2105 { 2106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2107 Mat a = aij->A,b = aij->B; 2108 PetscErrorCode ierr; 2109 PetscInt s1,s2,s3; 2110 2111 PetscFunctionBegin; 2112 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2113 if (rr) { 2114 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2115 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2116 /* Overlap communication with computation. */ 2117 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 } 2119 if (ll) { 2120 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2121 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2122 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2123 } 2124 /* scale the diagonal block */ 2125 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2126 2127 if (rr) { 2128 /* Do a scatter end and then right scale the off-diagonal block */ 2129 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2130 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2146 { 2147 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2148 Mat a,b,c,d; 2149 PetscBool flg; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 a = matA->A; b = matA->B; 2154 c = matB->A; d = matB->B; 2155 2156 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2157 if (flg) { 2158 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2159 } 2160 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2165 { 2166 PetscErrorCode ierr; 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2168 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2169 2170 PetscFunctionBegin; 2171 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2172 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2173 /* because of the column compression in the off-processor part of the matrix a->B, 2174 the number of columns in a->B and b->B may be different, hence we cannot call 2175 the MatCopy() directly on the two parts. If need be, we can provide a more 2176 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2177 then copying the submatrices */ 2178 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2179 } else { 2180 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2181 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2182 } 2183 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2188 { 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /* 2197 Computes the number of nonzeros per row needed for preallocation when X and Y 2198 have different nonzero structure. 2199 */ 2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2201 { 2202 PetscInt i,j,k,nzx,nzy; 2203 2204 PetscFunctionBegin; 2205 /* Set the number of nonzeros in the new matrix */ 2206 for (i=0; i<m; i++) { 2207 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2208 nzx = xi[i+1] - xi[i]; 2209 nzy = yi[i+1] - yi[i]; 2210 nnz[i] = 0; 2211 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2212 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2213 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2214 nnz[i]++; 2215 } 2216 for (; k<nzy; k++) nnz[i]++; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2223 { 2224 PetscErrorCode ierr; 2225 PetscInt m = Y->rmap->N; 2226 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2227 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2238 PetscBLASInt bnz,one=1; 2239 Mat_SeqAIJ *x,*y; 2240 2241 PetscFunctionBegin; 2242 if (str == SAME_NONZERO_PATTERN) { 2243 PetscScalar alpha = a; 2244 x = (Mat_SeqAIJ*)xx->A->data; 2245 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2246 y = (Mat_SeqAIJ*)yy->A->data; 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 x = (Mat_SeqAIJ*)xx->B->data; 2249 y = (Mat_SeqAIJ*)yy->B->data; 2250 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2251 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2252 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2253 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2254 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2255 } else { 2256 Mat B; 2257 PetscInt *nnz_d,*nnz_o; 2258 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2259 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2260 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2261 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2262 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2263 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2264 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2265 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2266 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2268 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2269 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2270 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2271 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2277 2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2279 { 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscErrorCode ierr; 2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2283 2284 PetscFunctionBegin; 2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2287 #else 2288 PetscFunctionBegin; 2289 #endif 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2300 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 PetscInt i,*idxb = 0; 2320 PetscScalar *va,*vb; 2321 Vec vtmp; 2322 2323 PetscFunctionBegin; 2324 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2325 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2326 if (idx) { 2327 for (i=0; i<A->rmap->n; i++) { 2328 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2329 } 2330 } 2331 2332 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2333 if (idx) { 2334 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2335 } 2336 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2337 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2338 2339 for (i=0; i<A->rmap->n; i++) { 2340 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2341 va[i] = vb[i]; 2342 if (idx) idx[i] = a->garray[idxb[i]]; 2343 } 2344 } 2345 2346 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2347 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2348 ierr = PetscFree(idxb);CHKERRQ(ierr); 2349 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2354 { 2355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2356 PetscErrorCode ierr; 2357 PetscInt i,*idxb = 0; 2358 PetscScalar *va,*vb; 2359 Vec vtmp; 2360 2361 PetscFunctionBegin; 2362 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2364 if (idx) { 2365 for (i=0; i<A->cmap->n; i++) { 2366 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2367 } 2368 } 2369 2370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2371 if (idx) { 2372 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2373 } 2374 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2375 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2376 2377 for (i=0; i<A->rmap->n; i++) { 2378 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2379 va[i] = vb[i]; 2380 if (idx) idx[i] = a->garray[idxb[i]]; 2381 } 2382 } 2383 2384 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2385 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2386 ierr = PetscFree(idxb);CHKERRQ(ierr); 2387 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2392 { 2393 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2394 PetscInt n = A->rmap->n; 2395 PetscInt cstart = A->cmap->rstart; 2396 PetscInt *cmap = mat->garray; 2397 PetscInt *diagIdx, *offdiagIdx; 2398 Vec diagV, offdiagV; 2399 PetscScalar *a, *diagA, *offdiagA; 2400 PetscInt r; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2406 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2407 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2408 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2409 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2410 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2411 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2412 for (r = 0; r < n; ++r) { 2413 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2414 a[r] = diagA[r]; 2415 idx[r] = cstart + diagIdx[r]; 2416 } else { 2417 a[r] = offdiagA[r]; 2418 idx[r] = cmap[offdiagIdx[r]]; 2419 } 2420 } 2421 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2424 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2425 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2426 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2433 PetscInt n = A->rmap->n; 2434 PetscInt cstart = A->cmap->rstart; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 PetscInt r; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2447 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 for (r = 0; r < n; ++r) { 2452 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2453 a[r] = diagA[r]; 2454 idx[r] = cstart + diagIdx[r]; 2455 } else { 2456 a[r] = offdiagA[r]; 2457 idx[r] = cmap[offdiagIdx[r]]; 2458 } 2459 } 2460 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2470 { 2471 PetscErrorCode ierr; 2472 Mat *dummy; 2473 2474 PetscFunctionBegin; 2475 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2476 *newmat = *dummy; 2477 ierr = PetscFree(dummy);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2482 { 2483 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2488 A->factorerrortype = a->A->factorerrortype; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2493 { 2494 PetscErrorCode ierr; 2495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2496 2497 PetscFunctionBegin; 2498 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2499 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2500 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2506 { 2507 PetscFunctionBegin; 2508 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2509 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /*@ 2514 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2515 2516 Collective on Mat 2517 2518 Input Parameters: 2519 + A - the matrix 2520 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2521 2522 Level: advanced 2523 2524 @*/ 2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2526 { 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2535 { 2536 PetscErrorCode ierr; 2537 PetscBool sc = PETSC_FALSE,flg; 2538 2539 PetscFunctionBegin; 2540 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2541 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2542 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2543 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2544 if (flg) { 2545 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2546 } 2547 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2552 { 2553 PetscErrorCode ierr; 2554 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2555 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2556 2557 PetscFunctionBegin; 2558 if (!Y->preallocated) { 2559 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2560 } else if (!aij->nz) { 2561 PetscInt nonew = aij->nonew; 2562 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2563 aij->nonew = nonew; 2564 } 2565 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2570 { 2571 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2572 PetscErrorCode ierr; 2573 2574 PetscFunctionBegin; 2575 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2576 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2577 if (d) { 2578 PetscInt rstart; 2579 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2580 *d += rstart; 2581 2582 } 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2587 { 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2589 PetscErrorCode ierr; 2590 2591 PetscFunctionBegin; 2592 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 /* -------------------------------------------------------------------*/ 2597 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2598 MatGetRow_MPIAIJ, 2599 MatRestoreRow_MPIAIJ, 2600 MatMult_MPIAIJ, 2601 /* 4*/ MatMultAdd_MPIAIJ, 2602 MatMultTranspose_MPIAIJ, 2603 MatMultTransposeAdd_MPIAIJ, 2604 0, 2605 0, 2606 0, 2607 /*10*/ 0, 2608 0, 2609 0, 2610 MatSOR_MPIAIJ, 2611 MatTranspose_MPIAIJ, 2612 /*15*/ MatGetInfo_MPIAIJ, 2613 MatEqual_MPIAIJ, 2614 MatGetDiagonal_MPIAIJ, 2615 MatDiagonalScale_MPIAIJ, 2616 MatNorm_MPIAIJ, 2617 /*20*/ MatAssemblyBegin_MPIAIJ, 2618 MatAssemblyEnd_MPIAIJ, 2619 MatSetOption_MPIAIJ, 2620 MatZeroEntries_MPIAIJ, 2621 /*24*/ MatZeroRows_MPIAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /*29*/ MatSetUp_MPIAIJ, 2627 0, 2628 0, 2629 MatGetDiagonalBlock_MPIAIJ, 2630 0, 2631 /*34*/ MatDuplicate_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*39*/ MatAXPY_MPIAIJ, 2637 MatCreateSubMatrices_MPIAIJ, 2638 MatIncreaseOverlap_MPIAIJ, 2639 MatGetValues_MPIAIJ, 2640 MatCopy_MPIAIJ, 2641 /*44*/ MatGetRowMax_MPIAIJ, 2642 MatScale_MPIAIJ, 2643 MatShift_MPIAIJ, 2644 MatDiagonalSet_MPIAIJ, 2645 MatZeroRowsColumns_MPIAIJ, 2646 /*49*/ MatSetRandom_MPIAIJ, 2647 0, 2648 0, 2649 0, 2650 0, 2651 /*54*/ MatFDColoringCreate_MPIXAIJ, 2652 0, 2653 MatSetUnfactored_MPIAIJ, 2654 MatPermute_MPIAIJ, 2655 0, 2656 /*59*/ MatCreateSubMatrix_MPIAIJ, 2657 MatDestroy_MPIAIJ, 2658 MatView_MPIAIJ, 2659 0, 2660 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2661 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2662 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2663 0, 2664 0, 2665 0, 2666 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2667 MatGetRowMinAbs_MPIAIJ, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*75*/ MatFDColoringApply_AIJ, 2673 MatSetFromOptions_MPIAIJ, 2674 0, 2675 0, 2676 MatFindZeroDiagonals_MPIAIJ, 2677 /*80*/ 0, 2678 0, 2679 0, 2680 /*83*/ MatLoad_MPIAIJ, 2681 MatIsSymmetric_MPIAIJ, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2687 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2688 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2689 MatPtAP_MPIAIJ_MPIAIJ, 2690 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2691 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /*99*/ 0, 2697 0, 2698 0, 2699 MatConjugate_MPIAIJ, 2700 0, 2701 /*104*/MatSetValuesRow_MPIAIJ, 2702 MatRealPart_MPIAIJ, 2703 MatImaginaryPart_MPIAIJ, 2704 0, 2705 0, 2706 /*109*/0, 2707 0, 2708 MatGetRowMin_MPIAIJ, 2709 0, 2710 MatMissingDiagonal_MPIAIJ, 2711 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2712 0, 2713 MatGetGhosts_MPIAIJ, 2714 0, 2715 0, 2716 /*119*/0, 2717 0, 2718 0, 2719 0, 2720 MatGetMultiProcBlock_MPIAIJ, 2721 /*124*/MatFindNonzeroRows_MPIAIJ, 2722 MatGetColumnNorms_MPIAIJ, 2723 MatInvertBlockDiagonal_MPIAIJ, 2724 MatInvertVariableBlockDiagonal_MPIAIJ, 2725 MatCreateSubMatricesMPI_MPIAIJ, 2726 /*129*/0, 2727 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2728 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2729 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2730 0, 2731 /*134*/0, 2732 0, 2733 MatRARt_MPIAIJ_MPIAIJ, 2734 0, 2735 0, 2736 /*139*/MatSetBlockSizes_MPIAIJ, 2737 0, 2738 0, 2739 MatFDColoringSetUp_MPIXAIJ, 2740 MatFindOffBlockDiagonalEntries_MPIAIJ, 2741 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2742 }; 2743 2744 /* ----------------------------------------------------------------------------------------*/ 2745 2746 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2758 { 2759 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2760 PetscErrorCode ierr; 2761 2762 PetscFunctionBegin; 2763 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2764 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2769 { 2770 Mat_MPIAIJ *b; 2771 PetscErrorCode ierr; 2772 2773 PetscFunctionBegin; 2774 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2775 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2776 b = (Mat_MPIAIJ*)B->data; 2777 2778 #if defined(PETSC_USE_CTABLE) 2779 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2780 #else 2781 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2782 #endif 2783 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2784 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2785 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2786 2787 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2788 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2789 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2790 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2791 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2792 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2794 2795 if (!B->preallocated) { 2796 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2797 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2798 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2799 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2800 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2801 } 2802 2803 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2804 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2805 B->preallocated = PETSC_TRUE; 2806 B->was_assembled = PETSC_FALSE; 2807 B->assembled = PETSC_FALSE;; 2808 PetscFunctionReturn(0); 2809 } 2810 2811 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2812 { 2813 Mat_MPIAIJ *b; 2814 PetscErrorCode ierr; 2815 2816 PetscFunctionBegin; 2817 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2818 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2819 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2820 b = (Mat_MPIAIJ*)B->data; 2821 2822 #if defined(PETSC_USE_CTABLE) 2823 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2824 #else 2825 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2826 #endif 2827 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2828 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2829 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2830 2831 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2832 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2833 B->preallocated = PETSC_TRUE; 2834 B->was_assembled = PETSC_FALSE; 2835 B->assembled = PETSC_FALSE; 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2840 { 2841 Mat mat; 2842 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2843 PetscErrorCode ierr; 2844 2845 PetscFunctionBegin; 2846 *newmat = 0; 2847 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2848 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2849 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2850 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2851 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2852 a = (Mat_MPIAIJ*)mat->data; 2853 2854 mat->factortype = matin->factortype; 2855 mat->assembled = PETSC_TRUE; 2856 mat->insertmode = NOT_SET_VALUES; 2857 mat->preallocated = PETSC_TRUE; 2858 2859 a->size = oldmat->size; 2860 a->rank = oldmat->rank; 2861 a->donotstash = oldmat->donotstash; 2862 a->roworiented = oldmat->roworiented; 2863 a->rowindices = 0; 2864 a->rowvalues = 0; 2865 a->getrowactive = PETSC_FALSE; 2866 2867 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2868 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2869 2870 if (oldmat->colmap) { 2871 #if defined(PETSC_USE_CTABLE) 2872 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2873 #else 2874 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2875 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2876 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2877 #endif 2878 } else a->colmap = 0; 2879 if (oldmat->garray) { 2880 PetscInt len; 2881 len = oldmat->B->cmap->n; 2882 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2883 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2884 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2885 } else a->garray = 0; 2886 2887 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2888 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2889 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2891 2892 if (oldmat->Mvctx_mpi1) { 2893 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2895 } 2896 2897 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2898 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2899 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2901 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2902 *newmat = mat; 2903 PetscFunctionReturn(0); 2904 } 2905 2906 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2907 { 2908 PetscScalar *vals,*svals; 2909 MPI_Comm comm; 2910 PetscErrorCode ierr; 2911 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2912 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2913 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2914 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2915 PetscInt cend,cstart,n,*rowners; 2916 int fd; 2917 PetscInt bs = newMat->rmap->bs; 2918 2919 PetscFunctionBegin; 2920 /* force binary viewer to load .info file if it has not yet done so */ 2921 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2922 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2924 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2925 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2926 if (!rank) { 2927 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2928 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2929 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2930 } 2931 2932 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2933 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2934 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2935 if (bs < 0) bs = 1; 2936 2937 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2938 M = header[1]; N = header[2]; 2939 2940 /* If global sizes are set, check if they are consistent with that given in the file */ 2941 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2942 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2943 2944 /* determine ownership of all (block) rows */ 2945 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2946 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2947 else m = newMat->rmap->n; /* Set by user */ 2948 2949 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2950 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2951 2952 /* First process needs enough room for process with most rows */ 2953 if (!rank) { 2954 mmax = rowners[1]; 2955 for (i=2; i<=size; i++) { 2956 mmax = PetscMax(mmax, rowners[i]); 2957 } 2958 } else mmax = -1; /* unused, but compilers complain */ 2959 2960 rowners[0] = 0; 2961 for (i=2; i<=size; i++) { 2962 rowners[i] += rowners[i-1]; 2963 } 2964 rstart = rowners[rank]; 2965 rend = rowners[rank+1]; 2966 2967 /* distribute row lengths to all processors */ 2968 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2969 if (!rank) { 2970 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2971 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2972 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2973 for (j=0; j<m; j++) { 2974 procsnz[0] += ourlens[j]; 2975 } 2976 for (i=1; i<size; i++) { 2977 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2978 /* calculate the number of nonzeros on each processor */ 2979 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2980 procsnz[i] += rowlengths[j]; 2981 } 2982 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2983 } 2984 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2985 } else { 2986 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2987 } 2988 2989 if (!rank) { 2990 /* determine max buffer needed and allocate it */ 2991 maxnz = 0; 2992 for (i=0; i<size; i++) { 2993 maxnz = PetscMax(maxnz,procsnz[i]); 2994 } 2995 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2996 2997 /* read in my part of the matrix column indices */ 2998 nz = procsnz[0]; 2999 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3000 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3001 3002 /* read in every one elses and ship off */ 3003 for (i=1; i<size; i++) { 3004 nz = procsnz[i]; 3005 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3006 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3007 } 3008 ierr = PetscFree(cols);CHKERRQ(ierr); 3009 } else { 3010 /* determine buffer space needed for message */ 3011 nz = 0; 3012 for (i=0; i<m; i++) { 3013 nz += ourlens[i]; 3014 } 3015 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3016 3017 /* receive message of column indices*/ 3018 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3019 } 3020 3021 /* determine column ownership if matrix is not square */ 3022 if (N != M) { 3023 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3024 else n = newMat->cmap->n; 3025 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3026 cstart = cend - n; 3027 } else { 3028 cstart = rstart; 3029 cend = rend; 3030 n = cend - cstart; 3031 } 3032 3033 /* loop over local rows, determining number of off diagonal entries */ 3034 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3035 jj = 0; 3036 for (i=0; i<m; i++) { 3037 for (j=0; j<ourlens[i]; j++) { 3038 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3039 jj++; 3040 } 3041 } 3042 3043 for (i=0; i<m; i++) { 3044 ourlens[i] -= offlens[i]; 3045 } 3046 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3047 3048 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3049 3050 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3051 3052 for (i=0; i<m; i++) { 3053 ourlens[i] += offlens[i]; 3054 } 3055 3056 if (!rank) { 3057 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3058 3059 /* read in my part of the matrix numerical values */ 3060 nz = procsnz[0]; 3061 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3062 3063 /* insert into matrix */ 3064 jj = rstart; 3065 smycols = mycols; 3066 svals = vals; 3067 for (i=0; i<m; i++) { 3068 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3069 smycols += ourlens[i]; 3070 svals += ourlens[i]; 3071 jj++; 3072 } 3073 3074 /* read in other processors and ship out */ 3075 for (i=1; i<size; i++) { 3076 nz = procsnz[i]; 3077 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3078 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3079 } 3080 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3081 } else { 3082 /* receive numeric values */ 3083 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3084 3085 /* receive message of values*/ 3086 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3087 3088 /* insert into matrix */ 3089 jj = rstart; 3090 smycols = mycols; 3091 svals = vals; 3092 for (i=0; i<m; i++) { 3093 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3094 smycols += ourlens[i]; 3095 svals += ourlens[i]; 3096 jj++; 3097 } 3098 } 3099 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3100 ierr = PetscFree(vals);CHKERRQ(ierr); 3101 ierr = PetscFree(mycols);CHKERRQ(ierr); 3102 ierr = PetscFree(rowners);CHKERRQ(ierr); 3103 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3110 { 3111 PetscErrorCode ierr; 3112 IS iscol_local; 3113 PetscBool isstride; 3114 PetscMPIInt lisstride=0,gisstride; 3115 3116 PetscFunctionBegin; 3117 /* check if we are grabbing all columns*/ 3118 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3119 3120 if (isstride) { 3121 PetscInt start,len,mstart,mlen; 3122 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3123 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3124 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3125 if (mstart == start && mlen-mstart == len) lisstride = 1; 3126 } 3127 3128 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3129 if (gisstride) { 3130 PetscInt N; 3131 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3132 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3133 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3134 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3135 } else { 3136 PetscInt cbs; 3137 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3138 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3139 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3140 } 3141 3142 *isseq = iscol_local; 3143 PetscFunctionReturn(0); 3144 } 3145 3146 /* 3147 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3148 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3149 3150 Input Parameters: 3151 mat - matrix 3152 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3153 i.e., mat->rstart <= isrow[i] < mat->rend 3154 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3155 i.e., mat->cstart <= iscol[i] < mat->cend 3156 Output Parameter: 3157 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3158 iscol_o - sequential column index set for retrieving mat->B 3159 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3160 */ 3161 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3162 { 3163 PetscErrorCode ierr; 3164 Vec x,cmap; 3165 const PetscInt *is_idx; 3166 PetscScalar *xarray,*cmaparray; 3167 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3168 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3169 Mat B=a->B; 3170 Vec lvec=a->lvec,lcmap; 3171 PetscInt i,cstart,cend,Bn=B->cmap->N; 3172 MPI_Comm comm; 3173 VecScatter Mvctx=a->Mvctx; 3174 3175 PetscFunctionBegin; 3176 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3177 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3178 3179 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3180 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3181 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3182 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3183 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3184 3185 /* Get start indices */ 3186 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3187 isstart -= ncols; 3188 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3189 3190 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3191 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3192 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3193 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3194 for (i=0; i<ncols; i++) { 3195 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3196 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3197 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3198 } 3199 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3200 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3201 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3202 3203 /* Get iscol_d */ 3204 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3205 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3206 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3207 3208 /* Get isrow_d */ 3209 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3210 rstart = mat->rmap->rstart; 3211 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3212 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3213 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3214 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3215 3216 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3217 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3218 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3219 3220 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3221 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3222 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3223 3224 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3225 3226 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3227 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3228 3229 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3230 /* off-process column indices */ 3231 count = 0; 3232 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3233 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3234 3235 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3236 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3237 for (i=0; i<Bn; i++) { 3238 if (PetscRealPart(xarray[i]) > -1.0) { 3239 idx[count] = i; /* local column index in off-diagonal part B */ 3240 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3241 count++; 3242 } 3243 } 3244 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3245 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3246 3247 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3248 /* cannot ensure iscol_o has same blocksize as iscol! */ 3249 3250 ierr = PetscFree(idx);CHKERRQ(ierr); 3251 *garray = cmap1; 3252 3253 ierr = VecDestroy(&x);CHKERRQ(ierr); 3254 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3255 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3261 { 3262 PetscErrorCode ierr; 3263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3264 Mat M = NULL; 3265 MPI_Comm comm; 3266 IS iscol_d,isrow_d,iscol_o; 3267 Mat Asub = NULL,Bsub = NULL; 3268 PetscInt n; 3269 3270 PetscFunctionBegin; 3271 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3272 3273 if (call == MAT_REUSE_MATRIX) { 3274 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3275 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3276 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3277 3278 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3279 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3280 3281 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3282 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3283 3284 /* Update diagonal and off-diagonal portions of submat */ 3285 asub = (Mat_MPIAIJ*)(*submat)->data; 3286 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3287 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3288 if (n) { 3289 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3290 } 3291 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 3294 } else { /* call == MAT_INITIAL_MATRIX) */ 3295 const PetscInt *garray; 3296 PetscInt BsubN; 3297 3298 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3299 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3300 3301 /* Create local submatrices Asub and Bsub */ 3302 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3303 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3304 3305 /* Create submatrix M */ 3306 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3307 3308 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3309 asub = (Mat_MPIAIJ*)M->data; 3310 3311 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3312 n = asub->B->cmap->N; 3313 if (BsubN > n) { 3314 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3315 const PetscInt *idx; 3316 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3317 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3318 3319 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3320 j = 0; 3321 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3322 for (i=0; i<n; i++) { 3323 if (j >= BsubN) break; 3324 while (subgarray[i] > garray[j]) j++; 3325 3326 if (subgarray[i] == garray[j]) { 3327 idx_new[i] = idx[j++]; 3328 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3329 } 3330 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3331 3332 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3333 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3334 3335 } else if (BsubN < n) { 3336 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3337 } 3338 3339 ierr = PetscFree(garray);CHKERRQ(ierr); 3340 *submat = M; 3341 3342 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3343 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3344 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3345 3346 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3347 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3348 3349 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3350 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3351 } 3352 PetscFunctionReturn(0); 3353 } 3354 3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3356 { 3357 PetscErrorCode ierr; 3358 IS iscol_local=NULL,isrow_d; 3359 PetscInt csize; 3360 PetscInt n,i,j,start,end; 3361 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3362 MPI_Comm comm; 3363 3364 PetscFunctionBegin; 3365 /* If isrow has same processor distribution as mat, 3366 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3367 if (call == MAT_REUSE_MATRIX) { 3368 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3369 if (isrow_d) { 3370 sameRowDist = PETSC_TRUE; 3371 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3372 } else { 3373 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3374 if (iscol_local) { 3375 sameRowDist = PETSC_TRUE; 3376 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3377 } 3378 } 3379 } else { 3380 /* Check if isrow has same processor distribution as mat */ 3381 sameDist[0] = PETSC_FALSE; 3382 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3383 if (!n) { 3384 sameDist[0] = PETSC_TRUE; 3385 } else { 3386 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3387 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3388 if (i >= start && j < end) { 3389 sameDist[0] = PETSC_TRUE; 3390 } 3391 } 3392 3393 /* Check if iscol has same processor distribution as mat */ 3394 sameDist[1] = PETSC_FALSE; 3395 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3396 if (!n) { 3397 sameDist[1] = PETSC_TRUE; 3398 } else { 3399 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3400 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3401 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3402 } 3403 3404 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3405 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3406 sameRowDist = tsameDist[0]; 3407 } 3408 3409 if (sameRowDist) { 3410 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3411 /* isrow and iscol have same processor distribution as mat */ 3412 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3413 PetscFunctionReturn(0); 3414 } else { /* sameRowDist */ 3415 /* isrow has same processor distribution as mat */ 3416 if (call == MAT_INITIAL_MATRIX) { 3417 PetscBool sorted; 3418 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3419 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3420 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3421 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3422 3423 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3424 if (sorted) { 3425 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3426 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 } else { /* call == MAT_REUSE_MATRIX */ 3430 IS iscol_sub; 3431 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3432 if (iscol_sub) { 3433 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3434 PetscFunctionReturn(0); 3435 } 3436 } 3437 } 3438 } 3439 3440 /* General case: iscol -> iscol_local which has global size of iscol */ 3441 if (call == MAT_REUSE_MATRIX) { 3442 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3443 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3444 } else { 3445 if (!iscol_local) { 3446 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3447 } 3448 } 3449 3450 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3451 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3452 3453 if (call == MAT_INITIAL_MATRIX) { 3454 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3455 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3456 } 3457 PetscFunctionReturn(0); 3458 } 3459 3460 /*@C 3461 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3462 and "off-diagonal" part of the matrix in CSR format. 3463 3464 Collective on MPI_Comm 3465 3466 Input Parameters: 3467 + comm - MPI communicator 3468 . A - "diagonal" portion of matrix 3469 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3470 - garray - global index of B columns 3471 3472 Output Parameter: 3473 . mat - the matrix, with input A as its local diagonal matrix 3474 Level: advanced 3475 3476 Notes: 3477 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3478 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3479 3480 .seealso: MatCreateMPIAIJWithSplitArrays() 3481 @*/ 3482 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3483 { 3484 PetscErrorCode ierr; 3485 Mat_MPIAIJ *maij; 3486 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3487 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3488 PetscScalar *oa=b->a; 3489 Mat Bnew; 3490 PetscInt m,n,N; 3491 3492 PetscFunctionBegin; 3493 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3494 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3495 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3496 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3497 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3498 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3499 3500 /* Get global columns of mat */ 3501 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3502 3503 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3504 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3505 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3506 maij = (Mat_MPIAIJ*)(*mat)->data; 3507 3508 (*mat)->preallocated = PETSC_TRUE; 3509 3510 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3511 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3512 3513 /* Set A as diagonal portion of *mat */ 3514 maij->A = A; 3515 3516 nz = oi[m]; 3517 for (i=0; i<nz; i++) { 3518 col = oj[i]; 3519 oj[i] = garray[col]; 3520 } 3521 3522 /* Set Bnew as off-diagonal portion of *mat */ 3523 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3524 bnew = (Mat_SeqAIJ*)Bnew->data; 3525 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3526 maij->B = Bnew; 3527 3528 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3529 3530 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3531 b->free_a = PETSC_FALSE; 3532 b->free_ij = PETSC_FALSE; 3533 ierr = MatDestroy(&B);CHKERRQ(ierr); 3534 3535 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3536 bnew->free_a = PETSC_TRUE; 3537 bnew->free_ij = PETSC_TRUE; 3538 3539 /* condense columns of maij->B */ 3540 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3541 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3542 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3543 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3544 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3545 PetscFunctionReturn(0); 3546 } 3547 3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3549 3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3551 { 3552 PetscErrorCode ierr; 3553 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3554 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3555 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3556 Mat M,Msub,B=a->B; 3557 MatScalar *aa; 3558 Mat_SeqAIJ *aij; 3559 PetscInt *garray = a->garray,*colsub,Ncols; 3560 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3561 IS iscol_sub,iscmap; 3562 const PetscInt *is_idx,*cmap; 3563 PetscBool allcolumns=PETSC_FALSE; 3564 MPI_Comm comm; 3565 3566 PetscFunctionBegin; 3567 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3568 3569 if (call == MAT_REUSE_MATRIX) { 3570 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3571 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3572 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3573 3574 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3575 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3576 3577 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3578 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3579 3580 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3581 3582 } else { /* call == MAT_INITIAL_MATRIX) */ 3583 PetscBool flg; 3584 3585 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3586 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3587 3588 /* (1) iscol -> nonscalable iscol_local */ 3589 /* Check for special case: each processor gets entire matrix columns */ 3590 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3591 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3592 if (allcolumns) { 3593 iscol_sub = iscol_local; 3594 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3595 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3596 3597 } else { 3598 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3599 PetscInt *idx,*cmap1,k; 3600 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3601 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3602 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3603 count = 0; 3604 k = 0; 3605 for (i=0; i<Ncols; i++) { 3606 j = is_idx[i]; 3607 if (j >= cstart && j < cend) { 3608 /* diagonal part of mat */ 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (Bn) { 3612 /* off-diagonal part of mat */ 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } else if (j > garray[k]) { 3617 while (j > garray[k] && k < Bn-1) k++; 3618 if (j == garray[k]) { 3619 idx[count] = j; 3620 cmap1[count++] = i; /* column index in submat */ 3621 } 3622 } 3623 } 3624 } 3625 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3626 3627 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3628 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3629 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3630 3631 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3632 } 3633 3634 /* (3) Create sequential Msub */ 3635 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3636 } 3637 3638 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3639 aij = (Mat_SeqAIJ*)(Msub)->data; 3640 ii = aij->i; 3641 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3642 3643 /* 3644 m - number of local rows 3645 Ncols - number of columns (same on all processors) 3646 rstart - first row in new global matrix generated 3647 */ 3648 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3649 3650 if (call == MAT_INITIAL_MATRIX) { 3651 /* (4) Create parallel newmat */ 3652 PetscMPIInt rank,size; 3653 PetscInt csize; 3654 3655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3656 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3657 3658 /* 3659 Determine the number of non-zeros in the diagonal and off-diagonal 3660 portions of the matrix in order to do correct preallocation 3661 */ 3662 3663 /* first get start and end of "diagonal" columns */ 3664 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3665 if (csize == PETSC_DECIDE) { 3666 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3667 if (mglobal == Ncols) { /* square matrix */ 3668 nlocal = m; 3669 } else { 3670 nlocal = Ncols/size + ((Ncols % size) > rank); 3671 } 3672 } else { 3673 nlocal = csize; 3674 } 3675 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3676 rstart = rend - nlocal; 3677 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3678 3679 /* next, compute all the lengths */ 3680 jj = aij->j; 3681 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3682 olens = dlens + m; 3683 for (i=0; i<m; i++) { 3684 jend = ii[i+1] - ii[i]; 3685 olen = 0; 3686 dlen = 0; 3687 for (j=0; j<jend; j++) { 3688 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3689 else dlen++; 3690 jj++; 3691 } 3692 olens[i] = olen; 3693 dlens[i] = dlen; 3694 } 3695 3696 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3697 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3698 3699 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3700 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3701 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3702 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3703 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3704 ierr = PetscFree(dlens);CHKERRQ(ierr); 3705 3706 } else { /* call == MAT_REUSE_MATRIX */ 3707 M = *newmat; 3708 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3709 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3710 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3711 /* 3712 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3713 rather than the slower MatSetValues(). 3714 */ 3715 M->was_assembled = PETSC_TRUE; 3716 M->assembled = PETSC_FALSE; 3717 } 3718 3719 /* (5) Set values of Msub to *newmat */ 3720 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3721 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3722 3723 jj = aij->j; 3724 aa = aij->a; 3725 for (i=0; i<m; i++) { 3726 row = rstart + i; 3727 nz = ii[i+1] - ii[i]; 3728 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3729 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3730 jj += nz; aa += nz; 3731 } 3732 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3733 3734 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 3737 ierr = PetscFree(colsub);CHKERRQ(ierr); 3738 3739 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3740 if (call == MAT_INITIAL_MATRIX) { 3741 *newmat = M; 3742 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3743 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3744 3745 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3746 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3747 3748 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3749 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3750 3751 if (iscol_local) { 3752 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3753 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3754 } 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 /* 3760 Not great since it makes two copies of the submatrix, first an SeqAIJ 3761 in local and then by concatenating the local matrices the end result. 3762 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3763 3764 Note: This requires a sequential iscol with all indices. 3765 */ 3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3767 { 3768 PetscErrorCode ierr; 3769 PetscMPIInt rank,size; 3770 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3771 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3772 Mat M,Mreuse; 3773 MatScalar *aa,*vwork; 3774 MPI_Comm comm; 3775 Mat_SeqAIJ *aij; 3776 PetscBool colflag,allcolumns=PETSC_FALSE; 3777 3778 PetscFunctionBegin; 3779 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3780 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3781 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3782 3783 /* Check for special case: each processor gets entire matrix columns */ 3784 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3785 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3786 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3787 3788 if (call == MAT_REUSE_MATRIX) { 3789 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3790 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3791 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3792 } else { 3793 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3794 } 3795 3796 /* 3797 m - number of local rows 3798 n - number of columns (same on all processors) 3799 rstart - first row in new global matrix generated 3800 */ 3801 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3802 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3803 if (call == MAT_INITIAL_MATRIX) { 3804 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3805 ii = aij->i; 3806 jj = aij->j; 3807 3808 /* 3809 Determine the number of non-zeros in the diagonal and off-diagonal 3810 portions of the matrix in order to do correct preallocation 3811 */ 3812 3813 /* first get start and end of "diagonal" columns */ 3814 if (csize == PETSC_DECIDE) { 3815 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3816 if (mglobal == n) { /* square matrix */ 3817 nlocal = m; 3818 } else { 3819 nlocal = n/size + ((n % size) > rank); 3820 } 3821 } else { 3822 nlocal = csize; 3823 } 3824 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3825 rstart = rend - nlocal; 3826 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3827 3828 /* next, compute all the lengths */ 3829 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3830 olens = dlens + m; 3831 for (i=0; i<m; i++) { 3832 jend = ii[i+1] - ii[i]; 3833 olen = 0; 3834 dlen = 0; 3835 for (j=0; j<jend; j++) { 3836 if (*jj < rstart || *jj >= rend) olen++; 3837 else dlen++; 3838 jj++; 3839 } 3840 olens[i] = olen; 3841 dlens[i] = dlen; 3842 } 3843 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3844 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3845 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3846 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3847 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3848 ierr = PetscFree(dlens);CHKERRQ(ierr); 3849 } else { 3850 PetscInt ml,nl; 3851 3852 M = *newmat; 3853 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3854 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3855 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3856 /* 3857 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3858 rather than the slower MatSetValues(). 3859 */ 3860 M->was_assembled = PETSC_TRUE; 3861 M->assembled = PETSC_FALSE; 3862 } 3863 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3864 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3865 ii = aij->i; 3866 jj = aij->j; 3867 aa = aij->a; 3868 for (i=0; i<m; i++) { 3869 row = rstart + i; 3870 nz = ii[i+1] - ii[i]; 3871 cwork = jj; jj += nz; 3872 vwork = aa; aa += nz; 3873 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3874 } 3875 3876 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3877 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3878 *newmat = M; 3879 3880 /* save submatrix used in processor for next request */ 3881 if (call == MAT_INITIAL_MATRIX) { 3882 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3883 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3884 } 3885 PetscFunctionReturn(0); 3886 } 3887 3888 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3889 { 3890 PetscInt m,cstart, cend,j,nnz,i,d; 3891 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3892 const PetscInt *JJ; 3893 PetscScalar *values; 3894 PetscErrorCode ierr; 3895 PetscBool nooffprocentries; 3896 3897 PetscFunctionBegin; 3898 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3899 3900 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3901 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3902 m = B->rmap->n; 3903 cstart = B->cmap->rstart; 3904 cend = B->cmap->rend; 3905 rstart = B->rmap->rstart; 3906 3907 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3908 3909 #if defined(PETSC_USE_DEBUG) 3910 for (i=0; i<m; i++) { 3911 nnz = Ii[i+1]- Ii[i]; 3912 JJ = J + Ii[i]; 3913 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3914 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3915 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3916 } 3917 #endif 3918 3919 for (i=0; i<m; i++) { 3920 nnz = Ii[i+1]- Ii[i]; 3921 JJ = J + Ii[i]; 3922 nnz_max = PetscMax(nnz_max,nnz); 3923 d = 0; 3924 for (j=0; j<nnz; j++) { 3925 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3926 } 3927 d_nnz[i] = d; 3928 o_nnz[i] = nnz - d; 3929 } 3930 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3931 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3932 3933 if (v) values = (PetscScalar*)v; 3934 else { 3935 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3936 } 3937 3938 for (i=0; i<m; i++) { 3939 ii = i + rstart; 3940 nnz = Ii[i+1]- Ii[i]; 3941 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3942 } 3943 nooffprocentries = B->nooffprocentries; 3944 B->nooffprocentries = PETSC_TRUE; 3945 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3946 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3947 B->nooffprocentries = nooffprocentries; 3948 3949 if (!v) { 3950 ierr = PetscFree(values);CHKERRQ(ierr); 3951 } 3952 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3953 PetscFunctionReturn(0); 3954 } 3955 3956 /*@ 3957 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3958 (the default parallel PETSc format). 3959 3960 Collective on MPI_Comm 3961 3962 Input Parameters: 3963 + B - the matrix 3964 . i - the indices into j for the start of each local row (starts with zero) 3965 . j - the column indices for each local row (starts with zero) 3966 - v - optional values in the matrix 3967 3968 Level: developer 3969 3970 Notes: 3971 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3972 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3973 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3974 3975 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3976 3977 The format which is used for the sparse matrix input, is equivalent to a 3978 row-major ordering.. i.e for the following matrix, the input data expected is 3979 as shown 3980 3981 $ 1 0 0 3982 $ 2 0 3 P0 3983 $ ------- 3984 $ 4 5 6 P1 3985 $ 3986 $ Process0 [P0]: rows_owned=[0,1] 3987 $ i = {0,1,3} [size = nrow+1 = 2+1] 3988 $ j = {0,0,2} [size = 3] 3989 $ v = {1,2,3} [size = 3] 3990 $ 3991 $ Process1 [P1]: rows_owned=[2] 3992 $ i = {0,3} [size = nrow+1 = 1+1] 3993 $ j = {0,1,2} [size = 3] 3994 $ v = {4,5,6} [size = 3] 3995 3996 .keywords: matrix, aij, compressed row, sparse, parallel 3997 3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3999 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4000 @*/ 4001 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4002 { 4003 PetscErrorCode ierr; 4004 4005 PetscFunctionBegin; 4006 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 /*@C 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective on MPI_Comm 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 If the *_nnz parameter is given then the *_nz parameter is ignored 4038 4039 The AIJ format (also called the Yale sparse matrix format or 4040 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4041 storage. The stored row and column indices begin with zero. 4042 See Users-Manual: ch_mat for details. 4043 4044 The parallel matrix is partitioned such that the first m0 rows belong to 4045 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4046 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4047 4048 The DIAGONAL portion of the local submatrix of a processor can be defined 4049 as the submatrix which is obtained by extraction the part corresponding to 4050 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4051 first row that belongs to the processor, r2 is the last row belonging to 4052 the this processor, and c1-c2 is range of indices of the local part of a 4053 vector suitable for applying the matrix to. This is an mxn matrix. In the 4054 common case of a square matrix, the row and column ranges are the same and 4055 the DIAGONAL part is also square. The remaining portion of the local 4056 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4057 4058 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4059 4060 You can call MatGetInfo() to get information on how effective the preallocation was; 4061 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4062 You can also run with the option -info and look for messages with the string 4063 malloc in them to see if additional memory allocation was needed. 4064 4065 Example usage: 4066 4067 Consider the following 8x8 matrix with 34 non-zero values, that is 4068 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4069 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4070 as follows: 4071 4072 .vb 4073 1 2 0 | 0 3 0 | 0 4 4074 Proc0 0 5 6 | 7 0 0 | 8 0 4075 9 0 10 | 11 0 0 | 12 0 4076 ------------------------------------- 4077 13 0 14 | 15 16 17 | 0 0 4078 Proc1 0 18 0 | 19 20 21 | 0 0 4079 0 0 0 | 22 23 0 | 24 0 4080 ------------------------------------- 4081 Proc2 25 26 27 | 0 0 28 | 29 0 4082 30 0 0 | 31 32 33 | 0 34 4083 .ve 4084 4085 This can be represented as a collection of submatrices as: 4086 4087 .vb 4088 A B C 4089 D E F 4090 G H I 4091 .ve 4092 4093 Where the submatrices A,B,C are owned by proc0, D,E,F are 4094 owned by proc1, G,H,I are owned by proc2. 4095 4096 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4098 The 'M','N' parameters are 8,8, and have the same values on all procs. 4099 4100 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4101 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4102 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4103 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4104 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4105 matrix, ans [DF] as another SeqAIJ matrix. 4106 4107 When d_nz, o_nz parameters are specified, d_nz storage elements are 4108 allocated for every row of the local diagonal submatrix, and o_nz 4109 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4110 One way to choose d_nz and o_nz is to use the max nonzerors per local 4111 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4112 In this case, the values of d_nz,o_nz are: 4113 .vb 4114 proc0 : dnz = 2, o_nz = 2 4115 proc1 : dnz = 3, o_nz = 2 4116 proc2 : dnz = 1, o_nz = 4 4117 .ve 4118 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4119 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4120 for proc3. i.e we are using 12+15+10=37 storage locations to store 4121 34 values. 4122 4123 When d_nnz, o_nnz parameters are specified, the storage is specified 4124 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4125 In the above case the values for d_nnz,o_nnz are: 4126 .vb 4127 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4128 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4129 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4130 .ve 4131 Here the space allocated is sum of all the above values i.e 34, and 4132 hence pre-allocation is perfect. 4133 4134 Level: intermediate 4135 4136 .keywords: matrix, aij, compressed row, sparse, parallel 4137 4138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4139 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4140 @*/ 4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4142 { 4143 PetscErrorCode ierr; 4144 4145 PetscFunctionBegin; 4146 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4147 PetscValidType(B,1); 4148 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4149 PetscFunctionReturn(0); 4150 } 4151 4152 /*@ 4153 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4154 CSR format the local rows. 4155 4156 Collective on MPI_Comm 4157 4158 Input Parameters: 4159 + comm - MPI communicator 4160 . m - number of local rows (Cannot be PETSC_DECIDE) 4161 . n - This value should be the same as the local size used in creating the 4162 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4163 calculated if N is given) For square matrices n is almost always m. 4164 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4165 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4166 . i - row indices 4167 . j - column indices 4168 - a - matrix values 4169 4170 Output Parameter: 4171 . mat - the matrix 4172 4173 Level: intermediate 4174 4175 Notes: 4176 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4177 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4178 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4179 4180 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4181 4182 The format which is used for the sparse matrix input, is equivalent to a 4183 row-major ordering.. i.e for the following matrix, the input data expected is 4184 as shown 4185 4186 $ 1 0 0 4187 $ 2 0 3 P0 4188 $ ------- 4189 $ 4 5 6 P1 4190 $ 4191 $ Process0 [P0]: rows_owned=[0,1] 4192 $ i = {0,1,3} [size = nrow+1 = 2+1] 4193 $ j = {0,0,2} [size = 3] 4194 $ v = {1,2,3} [size = 3] 4195 $ 4196 $ Process1 [P1]: rows_owned=[2] 4197 $ i = {0,3} [size = nrow+1 = 1+1] 4198 $ j = {0,1,2} [size = 3] 4199 $ v = {4,5,6} [size = 3] 4200 4201 .keywords: matrix, aij, compressed row, sparse, parallel 4202 4203 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4204 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4205 @*/ 4206 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4207 { 4208 PetscErrorCode ierr; 4209 4210 PetscFunctionBegin; 4211 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4212 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4214 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4215 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4216 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4217 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4218 PetscFunctionReturn(0); 4219 } 4220 4221 /*@C 4222 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4223 (the default parallel PETSc format). For good matrix assembly performance 4224 the user should preallocate the matrix storage by setting the parameters 4225 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4226 performance can be increased by more than a factor of 50. 4227 4228 Collective on MPI_Comm 4229 4230 Input Parameters: 4231 + comm - MPI communicator 4232 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4233 This value should be the same as the local size used in creating the 4234 y vector for the matrix-vector product y = Ax. 4235 . n - This value should be the same as the local size used in creating the 4236 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4237 calculated if N is given) For square matrices n is almost always m. 4238 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4239 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4240 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4241 (same value is used for all local rows) 4242 . d_nnz - array containing the number of nonzeros in the various rows of the 4243 DIAGONAL portion of the local submatrix (possibly different for each row) 4244 or NULL, if d_nz is used to specify the nonzero structure. 4245 The size of this array is equal to the number of local rows, i.e 'm'. 4246 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4247 submatrix (same value is used for all local rows). 4248 - o_nnz - array containing the number of nonzeros in the various rows of the 4249 OFF-DIAGONAL portion of the local submatrix (possibly different for 4250 each row) or NULL, if o_nz is used to specify the nonzero 4251 structure. The size of this array is equal to the number 4252 of local rows, i.e 'm'. 4253 4254 Output Parameter: 4255 . A - the matrix 4256 4257 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4258 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4259 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4260 4261 Notes: 4262 If the *_nnz parameter is given then the *_nz parameter is ignored 4263 4264 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4265 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4266 storage requirements for this matrix. 4267 4268 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4269 processor than it must be used on all processors that share the object for 4270 that argument. 4271 4272 The user MUST specify either the local or global matrix dimensions 4273 (possibly both). 4274 4275 The parallel matrix is partitioned across processors such that the 4276 first m0 rows belong to process 0, the next m1 rows belong to 4277 process 1, the next m2 rows belong to process 2 etc.. where 4278 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4279 values corresponding to [m x N] submatrix. 4280 4281 The columns are logically partitioned with the n0 columns belonging 4282 to 0th partition, the next n1 columns belonging to the next 4283 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4284 4285 The DIAGONAL portion of the local submatrix on any given processor 4286 is the submatrix corresponding to the rows and columns m,n 4287 corresponding to the given processor. i.e diagonal matrix on 4288 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4289 etc. The remaining portion of the local submatrix [m x (N-n)] 4290 constitute the OFF-DIAGONAL portion. The example below better 4291 illustrates this concept. 4292 4293 For a square global matrix we define each processor's diagonal portion 4294 to be its local rows and the corresponding columns (a square submatrix); 4295 each processor's off-diagonal portion encompasses the remainder of the 4296 local matrix (a rectangular submatrix). 4297 4298 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4299 4300 When calling this routine with a single process communicator, a matrix of 4301 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4302 type of communicator, use the construction mechanism 4303 .vb 4304 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4305 .ve 4306 4307 $ MatCreate(...,&A); 4308 $ MatSetType(A,MATMPIAIJ); 4309 $ MatSetSizes(A, m,n,M,N); 4310 $ MatMPIAIJSetPreallocation(A,...); 4311 4312 By default, this format uses inodes (identical nodes) when possible. 4313 We search for consecutive rows with the same nonzero structure, thereby 4314 reusing matrix information to achieve increased efficiency. 4315 4316 Options Database Keys: 4317 + -mat_no_inode - Do not use inodes 4318 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4319 4320 4321 4322 Example usage: 4323 4324 Consider the following 8x8 matrix with 34 non-zero values, that is 4325 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4326 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4327 as follows 4328 4329 .vb 4330 1 2 0 | 0 3 0 | 0 4 4331 Proc0 0 5 6 | 7 0 0 | 8 0 4332 9 0 10 | 11 0 0 | 12 0 4333 ------------------------------------- 4334 13 0 14 | 15 16 17 | 0 0 4335 Proc1 0 18 0 | 19 20 21 | 0 0 4336 0 0 0 | 22 23 0 | 24 0 4337 ------------------------------------- 4338 Proc2 25 26 27 | 0 0 28 | 29 0 4339 30 0 0 | 31 32 33 | 0 34 4340 .ve 4341 4342 This can be represented as a collection of submatrices as 4343 4344 .vb 4345 A B C 4346 D E F 4347 G H I 4348 .ve 4349 4350 Where the submatrices A,B,C are owned by proc0, D,E,F are 4351 owned by proc1, G,H,I are owned by proc2. 4352 4353 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4354 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4355 The 'M','N' parameters are 8,8, and have the same values on all procs. 4356 4357 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4358 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4359 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4360 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4361 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4362 matrix, ans [DF] as another SeqAIJ matrix. 4363 4364 When d_nz, o_nz parameters are specified, d_nz storage elements are 4365 allocated for every row of the local diagonal submatrix, and o_nz 4366 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4367 One way to choose d_nz and o_nz is to use the max nonzerors per local 4368 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4369 In this case, the values of d_nz,o_nz are 4370 .vb 4371 proc0 : dnz = 2, o_nz = 2 4372 proc1 : dnz = 3, o_nz = 2 4373 proc2 : dnz = 1, o_nz = 4 4374 .ve 4375 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4376 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4377 for proc3. i.e we are using 12+15+10=37 storage locations to store 4378 34 values. 4379 4380 When d_nnz, o_nnz parameters are specified, the storage is specified 4381 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4382 In the above case the values for d_nnz,o_nnz are 4383 .vb 4384 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4385 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4386 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4387 .ve 4388 Here the space allocated is sum of all the above values i.e 34, and 4389 hence pre-allocation is perfect. 4390 4391 Level: intermediate 4392 4393 .keywords: matrix, aij, compressed row, sparse, parallel 4394 4395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4396 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4397 @*/ 4398 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4399 { 4400 PetscErrorCode ierr; 4401 PetscMPIInt size; 4402 4403 PetscFunctionBegin; 4404 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4405 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4406 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4407 if (size > 1) { 4408 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4409 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4410 } else { 4411 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4412 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4413 } 4414 PetscFunctionReturn(0); 4415 } 4416 4417 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4418 { 4419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4420 PetscBool flg; 4421 PetscErrorCode ierr; 4422 4423 PetscFunctionBegin; 4424 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4425 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4426 if (Ad) *Ad = a->A; 4427 if (Ao) *Ao = a->B; 4428 if (colmap) *colmap = a->garray; 4429 PetscFunctionReturn(0); 4430 } 4431 4432 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4433 { 4434 PetscErrorCode ierr; 4435 PetscInt m,N,i,rstart,nnz,Ii; 4436 PetscInt *indx; 4437 PetscScalar *values; 4438 4439 PetscFunctionBegin; 4440 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4441 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4442 PetscInt *dnz,*onz,sum,bs,cbs; 4443 4444 if (n == PETSC_DECIDE) { 4445 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4446 } 4447 /* Check sum(n) = N */ 4448 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4449 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4450 4451 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4452 rstart -= m; 4453 4454 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4455 for (i=0; i<m; i++) { 4456 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4457 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4458 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4459 } 4460 4461 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4462 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4463 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4464 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4465 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4466 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4467 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4468 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4469 } 4470 4471 /* numeric phase */ 4472 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4473 for (i=0; i<m; i++) { 4474 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4475 Ii = i + rstart; 4476 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4477 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4478 } 4479 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4480 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4481 PetscFunctionReturn(0); 4482 } 4483 4484 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4485 { 4486 PetscErrorCode ierr; 4487 PetscMPIInt rank; 4488 PetscInt m,N,i,rstart,nnz; 4489 size_t len; 4490 const PetscInt *indx; 4491 PetscViewer out; 4492 char *name; 4493 Mat B; 4494 const PetscScalar *values; 4495 4496 PetscFunctionBegin; 4497 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4498 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4499 /* Should this be the type of the diagonal block of A? */ 4500 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4501 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4502 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4503 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4504 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4505 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4506 for (i=0; i<m; i++) { 4507 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4508 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4509 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4510 } 4511 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4512 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4513 4514 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4515 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4516 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4517 sprintf(name,"%s.%d",outfile,rank); 4518 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4519 ierr = PetscFree(name);CHKERRQ(ierr); 4520 ierr = MatView(B,out);CHKERRQ(ierr); 4521 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4522 ierr = MatDestroy(&B);CHKERRQ(ierr); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4527 { 4528 PetscErrorCode ierr; 4529 Mat_Merge_SeqsToMPI *merge; 4530 PetscContainer container; 4531 4532 PetscFunctionBegin; 4533 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4534 if (container) { 4535 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4539 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4540 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4541 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4542 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4548 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4549 ierr = PetscFree(merge);CHKERRQ(ierr); 4550 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4551 } 4552 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4553 PetscFunctionReturn(0); 4554 } 4555 4556 #include <../src/mat/utils/freespace.h> 4557 #include <petscbt.h> 4558 4559 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4560 { 4561 PetscErrorCode ierr; 4562 MPI_Comm comm; 4563 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4564 PetscMPIInt size,rank,taga,*len_s; 4565 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4566 PetscInt proc,m; 4567 PetscInt **buf_ri,**buf_rj; 4568 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4569 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4570 MPI_Request *s_waits,*r_waits; 4571 MPI_Status *status; 4572 MatScalar *aa=a->a; 4573 MatScalar **abuf_r,*ba_i; 4574 Mat_Merge_SeqsToMPI *merge; 4575 PetscContainer container; 4576 4577 PetscFunctionBegin; 4578 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4579 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4580 4581 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4582 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4583 4584 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4585 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4586 4587 bi = merge->bi; 4588 bj = merge->bj; 4589 buf_ri = merge->buf_ri; 4590 buf_rj = merge->buf_rj; 4591 4592 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4593 owners = merge->rowmap->range; 4594 len_s = merge->len_s; 4595 4596 /* send and recv matrix values */ 4597 /*-----------------------------*/ 4598 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4599 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4600 4601 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4602 for (proc=0,k=0; proc<size; proc++) { 4603 if (!len_s[proc]) continue; 4604 i = owners[proc]; 4605 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4606 k++; 4607 } 4608 4609 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4610 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4611 ierr = PetscFree(status);CHKERRQ(ierr); 4612 4613 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4614 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4615 4616 /* insert mat values of mpimat */ 4617 /*----------------------------*/ 4618 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4619 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4620 4621 for (k=0; k<merge->nrecv; k++) { 4622 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4623 nrows = *(buf_ri_k[k]); 4624 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4625 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4626 } 4627 4628 /* set values of ba */ 4629 m = merge->rowmap->n; 4630 for (i=0; i<m; i++) { 4631 arow = owners[rank] + i; 4632 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4633 bnzi = bi[i+1] - bi[i]; 4634 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4635 4636 /* add local non-zero vals of this proc's seqmat into ba */ 4637 anzi = ai[arow+1] - ai[arow]; 4638 aj = a->j + ai[arow]; 4639 aa = a->a + ai[arow]; 4640 nextaj = 0; 4641 for (j=0; nextaj<anzi; j++) { 4642 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4643 ba_i[j] += aa[nextaj++]; 4644 } 4645 } 4646 4647 /* add received vals into ba */ 4648 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4649 /* i-th row */ 4650 if (i == *nextrow[k]) { 4651 anzi = *(nextai[k]+1) - *nextai[k]; 4652 aj = buf_rj[k] + *(nextai[k]); 4653 aa = abuf_r[k] + *(nextai[k]); 4654 nextaj = 0; 4655 for (j=0; nextaj<anzi; j++) { 4656 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4657 ba_i[j] += aa[nextaj++]; 4658 } 4659 } 4660 nextrow[k]++; nextai[k]++; 4661 } 4662 } 4663 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4664 } 4665 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4666 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4667 4668 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4669 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4670 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4671 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4672 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4673 PetscFunctionReturn(0); 4674 } 4675 4676 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4677 { 4678 PetscErrorCode ierr; 4679 Mat B_mpi; 4680 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4681 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4682 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4683 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4684 PetscInt len,proc,*dnz,*onz,bs,cbs; 4685 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4686 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4687 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4688 MPI_Status *status; 4689 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4690 PetscBT lnkbt; 4691 Mat_Merge_SeqsToMPI *merge; 4692 PetscContainer container; 4693 4694 PetscFunctionBegin; 4695 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4696 4697 /* make sure it is a PETSc comm */ 4698 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4699 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4700 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4701 4702 ierr = PetscNew(&merge);CHKERRQ(ierr); 4703 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4704 4705 /* determine row ownership */ 4706 /*---------------------------------------------------------*/ 4707 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4708 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4709 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4710 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4711 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4712 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4713 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4714 4715 m = merge->rowmap->n; 4716 owners = merge->rowmap->range; 4717 4718 /* determine the number of messages to send, their lengths */ 4719 /*---------------------------------------------------------*/ 4720 len_s = merge->len_s; 4721 4722 len = 0; /* length of buf_si[] */ 4723 merge->nsend = 0; 4724 for (proc=0; proc<size; proc++) { 4725 len_si[proc] = 0; 4726 if (proc == rank) { 4727 len_s[proc] = 0; 4728 } else { 4729 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4730 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4731 } 4732 if (len_s[proc]) { 4733 merge->nsend++; 4734 nrows = 0; 4735 for (i=owners[proc]; i<owners[proc+1]; i++) { 4736 if (ai[i+1] > ai[i]) nrows++; 4737 } 4738 len_si[proc] = 2*(nrows+1); 4739 len += len_si[proc]; 4740 } 4741 } 4742 4743 /* determine the number and length of messages to receive for ij-structure */ 4744 /*-------------------------------------------------------------------------*/ 4745 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4746 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4747 4748 /* post the Irecv of j-structure */ 4749 /*-------------------------------*/ 4750 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4751 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4752 4753 /* post the Isend of j-structure */ 4754 /*--------------------------------*/ 4755 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4756 4757 for (proc=0, k=0; proc<size; proc++) { 4758 if (!len_s[proc]) continue; 4759 i = owners[proc]; 4760 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4761 k++; 4762 } 4763 4764 /* receives and sends of j-structure are complete */ 4765 /*------------------------------------------------*/ 4766 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4767 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4768 4769 /* send and recv i-structure */ 4770 /*---------------------------*/ 4771 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4772 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4773 4774 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4775 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4776 for (proc=0,k=0; proc<size; proc++) { 4777 if (!len_s[proc]) continue; 4778 /* form outgoing message for i-structure: 4779 buf_si[0]: nrows to be sent 4780 [1:nrows]: row index (global) 4781 [nrows+1:2*nrows+1]: i-structure index 4782 */ 4783 /*-------------------------------------------*/ 4784 nrows = len_si[proc]/2 - 1; 4785 buf_si_i = buf_si + nrows+1; 4786 buf_si[0] = nrows; 4787 buf_si_i[0] = 0; 4788 nrows = 0; 4789 for (i=owners[proc]; i<owners[proc+1]; i++) { 4790 anzi = ai[i+1] - ai[i]; 4791 if (anzi) { 4792 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4793 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4794 nrows++; 4795 } 4796 } 4797 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4798 k++; 4799 buf_si += len_si[proc]; 4800 } 4801 4802 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4803 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4804 4805 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4806 for (i=0; i<merge->nrecv; i++) { 4807 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4808 } 4809 4810 ierr = PetscFree(len_si);CHKERRQ(ierr); 4811 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4812 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4813 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4814 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4815 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4816 ierr = PetscFree(status);CHKERRQ(ierr); 4817 4818 /* compute a local seq matrix in each processor */ 4819 /*----------------------------------------------*/ 4820 /* allocate bi array and free space for accumulating nonzero column info */ 4821 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4822 bi[0] = 0; 4823 4824 /* create and initialize a linked list */ 4825 nlnk = N+1; 4826 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4827 4828 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4829 len = ai[owners[rank+1]] - ai[owners[rank]]; 4830 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4831 4832 current_space = free_space; 4833 4834 /* determine symbolic info for each local row */ 4835 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4836 4837 for (k=0; k<merge->nrecv; k++) { 4838 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4839 nrows = *buf_ri_k[k]; 4840 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4841 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4842 } 4843 4844 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4845 len = 0; 4846 for (i=0; i<m; i++) { 4847 bnzi = 0; 4848 /* add local non-zero cols of this proc's seqmat into lnk */ 4849 arow = owners[rank] + i; 4850 anzi = ai[arow+1] - ai[arow]; 4851 aj = a->j + ai[arow]; 4852 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4853 bnzi += nlnk; 4854 /* add received col data into lnk */ 4855 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4856 if (i == *nextrow[k]) { /* i-th row */ 4857 anzi = *(nextai[k]+1) - *nextai[k]; 4858 aj = buf_rj[k] + *nextai[k]; 4859 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4860 bnzi += nlnk; 4861 nextrow[k]++; nextai[k]++; 4862 } 4863 } 4864 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4865 4866 /* if free space is not available, make more free space */ 4867 if (current_space->local_remaining<bnzi) { 4868 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4869 nspacedouble++; 4870 } 4871 /* copy data into free space, then initialize lnk */ 4872 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4873 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4874 4875 current_space->array += bnzi; 4876 current_space->local_used += bnzi; 4877 current_space->local_remaining -= bnzi; 4878 4879 bi[i+1] = bi[i] + bnzi; 4880 } 4881 4882 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4883 4884 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4885 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4886 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4887 4888 /* create symbolic parallel matrix B_mpi */ 4889 /*---------------------------------------*/ 4890 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4891 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4892 if (n==PETSC_DECIDE) { 4893 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4894 } else { 4895 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4896 } 4897 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4898 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4899 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4900 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4901 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4902 4903 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4904 B_mpi->assembled = PETSC_FALSE; 4905 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4906 merge->bi = bi; 4907 merge->bj = bj; 4908 merge->buf_ri = buf_ri; 4909 merge->buf_rj = buf_rj; 4910 merge->coi = NULL; 4911 merge->coj = NULL; 4912 merge->owners_co = NULL; 4913 4914 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4915 4916 /* attach the supporting struct to B_mpi for reuse */ 4917 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4918 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4919 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4920 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4921 *mpimat = B_mpi; 4922 4923 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 4927 /*@C 4928 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4929 matrices from each processor 4930 4931 Collective on MPI_Comm 4932 4933 Input Parameters: 4934 + comm - the communicators the parallel matrix will live on 4935 . seqmat - the input sequential matrices 4936 . m - number of local rows (or PETSC_DECIDE) 4937 . n - number of local columns (or PETSC_DECIDE) 4938 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4939 4940 Output Parameter: 4941 . mpimat - the parallel matrix generated 4942 4943 Level: advanced 4944 4945 Notes: 4946 The dimensions of the sequential matrix in each processor MUST be the same. 4947 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4948 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4949 @*/ 4950 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4951 { 4952 PetscErrorCode ierr; 4953 PetscMPIInt size; 4954 4955 PetscFunctionBegin; 4956 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4957 if (size == 1) { 4958 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4959 if (scall == MAT_INITIAL_MATRIX) { 4960 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4961 } else { 4962 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4963 } 4964 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4968 if (scall == MAT_INITIAL_MATRIX) { 4969 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4970 } 4971 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4972 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 4976 /*@ 4977 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4978 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4979 with MatGetSize() 4980 4981 Not Collective 4982 4983 Input Parameters: 4984 + A - the matrix 4985 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4986 4987 Output Parameter: 4988 . A_loc - the local sequential matrix generated 4989 4990 Level: developer 4991 4992 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4993 4994 @*/ 4995 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4996 { 4997 PetscErrorCode ierr; 4998 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4999 Mat_SeqAIJ *mat,*a,*b; 5000 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5001 MatScalar *aa,*ba,*cam; 5002 PetscScalar *ca; 5003 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5004 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5005 PetscBool match; 5006 MPI_Comm comm; 5007 PetscMPIInt size; 5008 5009 PetscFunctionBegin; 5010 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5011 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5012 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5014 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5015 5016 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5017 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5018 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5019 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5020 aa = a->a; ba = b->a; 5021 if (scall == MAT_INITIAL_MATRIX) { 5022 if (size == 1) { 5023 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5028 ci[0] = 0; 5029 for (i=0; i<am; i++) { 5030 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5031 } 5032 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5033 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5034 k = 0; 5035 for (i=0; i<am; i++) { 5036 ncols_o = bi[i+1] - bi[i]; 5037 ncols_d = ai[i+1] - ai[i]; 5038 /* off-diagonal portion of A */ 5039 for (jo=0; jo<ncols_o; jo++) { 5040 col = cmap[*bj]; 5041 if (col >= cstart) break; 5042 cj[k] = col; bj++; 5043 ca[k++] = *ba++; 5044 } 5045 /* diagonal portion of A */ 5046 for (j=0; j<ncols_d; j++) { 5047 cj[k] = cstart + *aj++; 5048 ca[k++] = *aa++; 5049 } 5050 /* off-diagonal portion of A */ 5051 for (j=jo; j<ncols_o; j++) { 5052 cj[k] = cmap[*bj++]; 5053 ca[k++] = *ba++; 5054 } 5055 } 5056 /* put together the new matrix */ 5057 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5058 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5059 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5060 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5061 mat->free_a = PETSC_TRUE; 5062 mat->free_ij = PETSC_TRUE; 5063 mat->nonew = 0; 5064 } else if (scall == MAT_REUSE_MATRIX) { 5065 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5066 ci = mat->i; cj = mat->j; cam = mat->a; 5067 for (i=0; i<am; i++) { 5068 /* off-diagonal portion of A */ 5069 ncols_o = bi[i+1] - bi[i]; 5070 for (jo=0; jo<ncols_o; jo++) { 5071 col = cmap[*bj]; 5072 if (col >= cstart) break; 5073 *cam++ = *ba++; bj++; 5074 } 5075 /* diagonal portion of A */ 5076 ncols_d = ai[i+1] - ai[i]; 5077 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5078 /* off-diagonal portion of A */ 5079 for (j=jo; j<ncols_o; j++) { 5080 *cam++ = *ba++; bj++; 5081 } 5082 } 5083 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5084 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5085 PetscFunctionReturn(0); 5086 } 5087 5088 /*@C 5089 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5090 5091 Not Collective 5092 5093 Input Parameters: 5094 + A - the matrix 5095 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5096 - row, col - index sets of rows and columns to extract (or NULL) 5097 5098 Output Parameter: 5099 . A_loc - the local sequential matrix generated 5100 5101 Level: developer 5102 5103 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5104 5105 @*/ 5106 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5107 { 5108 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5109 PetscErrorCode ierr; 5110 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5111 IS isrowa,iscola; 5112 Mat *aloc; 5113 PetscBool match; 5114 5115 PetscFunctionBegin; 5116 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5117 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5118 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5119 if (!row) { 5120 start = A->rmap->rstart; end = A->rmap->rend; 5121 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5122 } else { 5123 isrowa = *row; 5124 } 5125 if (!col) { 5126 start = A->cmap->rstart; 5127 cmap = a->garray; 5128 nzA = a->A->cmap->n; 5129 nzB = a->B->cmap->n; 5130 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5131 ncols = 0; 5132 for (i=0; i<nzB; i++) { 5133 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5134 else break; 5135 } 5136 imark = i; 5137 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5138 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5139 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5140 } else { 5141 iscola = *col; 5142 } 5143 if (scall != MAT_INITIAL_MATRIX) { 5144 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5145 aloc[0] = *A_loc; 5146 } 5147 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5148 if (!col) { /* attach global id of condensed columns */ 5149 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5150 } 5151 *A_loc = aloc[0]; 5152 ierr = PetscFree(aloc);CHKERRQ(ierr); 5153 if (!row) { 5154 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5155 } 5156 if (!col) { 5157 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5158 } 5159 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5160 PetscFunctionReturn(0); 5161 } 5162 5163 /*@C 5164 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5165 5166 Collective on Mat 5167 5168 Input Parameters: 5169 + A,B - the matrices in mpiaij format 5170 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5171 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5172 5173 Output Parameter: 5174 + rowb, colb - index sets of rows and columns of B to extract 5175 - B_seq - the sequential matrix generated 5176 5177 Level: developer 5178 5179 @*/ 5180 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5181 { 5182 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5183 PetscErrorCode ierr; 5184 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5185 IS isrowb,iscolb; 5186 Mat *bseq=NULL; 5187 5188 PetscFunctionBegin; 5189 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5190 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5191 } 5192 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5193 5194 if (scall == MAT_INITIAL_MATRIX) { 5195 start = A->cmap->rstart; 5196 cmap = a->garray; 5197 nzA = a->A->cmap->n; 5198 nzB = a->B->cmap->n; 5199 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5200 ncols = 0; 5201 for (i=0; i<nzB; i++) { /* row < local row index */ 5202 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5203 else break; 5204 } 5205 imark = i; 5206 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5207 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5208 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5209 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5210 } else { 5211 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5212 isrowb = *rowb; iscolb = *colb; 5213 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5214 bseq[0] = *B_seq; 5215 } 5216 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5217 *B_seq = bseq[0]; 5218 ierr = PetscFree(bseq);CHKERRQ(ierr); 5219 if (!rowb) { 5220 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5221 } else { 5222 *rowb = isrowb; 5223 } 5224 if (!colb) { 5225 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5226 } else { 5227 *colb = iscolb; 5228 } 5229 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5230 PetscFunctionReturn(0); 5231 } 5232 5233 /* 5234 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5235 of the OFF-DIAGONAL portion of local A 5236 5237 Collective on Mat 5238 5239 Input Parameters: 5240 + A,B - the matrices in mpiaij format 5241 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5242 5243 Output Parameter: 5244 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5245 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5246 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5247 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5248 5249 Level: developer 5250 5251 */ 5252 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5253 { 5254 VecScatter_MPI_General *gen_to,*gen_from; 5255 PetscErrorCode ierr; 5256 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5257 Mat_SeqAIJ *b_oth; 5258 VecScatter ctx; 5259 MPI_Comm comm; 5260 PetscMPIInt *rprocs,*sprocs,tag,rank; 5261 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5262 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5263 PetscScalar *b_otha,*bufa,*bufA,*vals; 5264 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5265 MPI_Request *rwaits = NULL,*swaits = NULL; 5266 MPI_Status *sstatus,rstatus; 5267 PetscMPIInt jj,size; 5268 VecScatterType type; 5269 PetscBool mpi1; 5270 5271 PetscFunctionBegin; 5272 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5273 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5274 5275 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5276 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5277 } 5278 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5279 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5280 5281 if (size == 1) { 5282 startsj_s = NULL; 5283 bufa_ptr = NULL; 5284 *B_oth = NULL; 5285 PetscFunctionReturn(0); 5286 } 5287 5288 ctx = a->Mvctx; 5289 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5290 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5291 if (!mpi1) { 5292 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5293 thus create a->Mvctx_mpi1 */ 5294 if (!a->Mvctx_mpi1) { 5295 a->Mvctx_mpi1_flg = PETSC_TRUE; 5296 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5297 } 5298 ctx = a->Mvctx_mpi1; 5299 } 5300 tag = ((PetscObject)ctx)->tag; 5301 5302 gen_to = (VecScatter_MPI_General*)ctx->todata; 5303 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5304 nrecvs = gen_from->n; 5305 nsends = gen_to->n; 5306 5307 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5308 srow = gen_to->indices; /* local row index to be sent */ 5309 sstarts = gen_to->starts; 5310 sprocs = gen_to->procs; 5311 sstatus = gen_to->sstatus; 5312 sbs = gen_to->bs; 5313 rstarts = gen_from->starts; 5314 rprocs = gen_from->procs; 5315 rbs = gen_from->bs; 5316 5317 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5318 if (scall == MAT_INITIAL_MATRIX) { 5319 /* i-array */ 5320 /*---------*/ 5321 /* post receives */ 5322 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5323 for (i=0; i<nrecvs; i++) { 5324 rowlen = rvalues + rstarts[i]*rbs; 5325 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5326 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5327 } 5328 5329 /* pack the outgoing message */ 5330 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5331 5332 sstartsj[0] = 0; 5333 rstartsj[0] = 0; 5334 len = 0; /* total length of j or a array to be sent */ 5335 k = 0; 5336 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5337 for (i=0; i<nsends; i++) { 5338 rowlen = svalues + sstarts[i]*sbs; 5339 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5340 for (j=0; j<nrows; j++) { 5341 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5342 for (l=0; l<sbs; l++) { 5343 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5344 5345 rowlen[j*sbs+l] = ncols; 5346 5347 len += ncols; 5348 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5349 } 5350 k++; 5351 } 5352 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5353 5354 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5355 } 5356 /* recvs and sends of i-array are completed */ 5357 i = nrecvs; 5358 while (i--) { 5359 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5360 } 5361 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5362 ierr = PetscFree(svalues);CHKERRQ(ierr); 5363 5364 /* allocate buffers for sending j and a arrays */ 5365 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5366 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5367 5368 /* create i-array of B_oth */ 5369 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5370 5371 b_othi[0] = 0; 5372 len = 0; /* total length of j or a array to be received */ 5373 k = 0; 5374 for (i=0; i<nrecvs; i++) { 5375 rowlen = rvalues + rstarts[i]*rbs; 5376 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5377 for (j=0; j<nrows; j++) { 5378 b_othi[k+1] = b_othi[k] + rowlen[j]; 5379 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5380 k++; 5381 } 5382 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5383 } 5384 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5385 5386 /* allocate space for j and a arrrays of B_oth */ 5387 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5388 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5389 5390 /* j-array */ 5391 /*---------*/ 5392 /* post receives of j-array */ 5393 for (i=0; i<nrecvs; i++) { 5394 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5395 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5396 } 5397 5398 /* pack the outgoing message j-array */ 5399 k = 0; 5400 for (i=0; i<nsends; i++) { 5401 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5402 bufJ = bufj+sstartsj[i]; 5403 for (j=0; j<nrows; j++) { 5404 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5405 for (ll=0; ll<sbs; ll++) { 5406 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5407 for (l=0; l<ncols; l++) { 5408 *bufJ++ = cols[l]; 5409 } 5410 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5414 } 5415 5416 /* recvs and sends of j-array are completed */ 5417 i = nrecvs; 5418 while (i--) { 5419 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5420 } 5421 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5422 } else if (scall == MAT_REUSE_MATRIX) { 5423 sstartsj = *startsj_s; 5424 rstartsj = *startsj_r; 5425 bufa = *bufa_ptr; 5426 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5427 b_otha = b_oth->a; 5428 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5429 5430 /* a-array */ 5431 /*---------*/ 5432 /* post receives of a-array */ 5433 for (i=0; i<nrecvs; i++) { 5434 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5435 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5436 } 5437 5438 /* pack the outgoing message a-array */ 5439 k = 0; 5440 for (i=0; i<nsends; i++) { 5441 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5442 bufA = bufa+sstartsj[i]; 5443 for (j=0; j<nrows; j++) { 5444 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5445 for (ll=0; ll<sbs; ll++) { 5446 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5447 for (l=0; l<ncols; l++) { 5448 *bufA++ = vals[l]; 5449 } 5450 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5451 } 5452 } 5453 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5454 } 5455 /* recvs and sends of a-array are completed */ 5456 i = nrecvs; 5457 while (i--) { 5458 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5459 } 5460 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5461 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5462 5463 if (scall == MAT_INITIAL_MATRIX) { 5464 /* put together the new matrix */ 5465 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5466 5467 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5468 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5469 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5470 b_oth->free_a = PETSC_TRUE; 5471 b_oth->free_ij = PETSC_TRUE; 5472 b_oth->nonew = 0; 5473 5474 ierr = PetscFree(bufj);CHKERRQ(ierr); 5475 if (!startsj_s || !bufa_ptr) { 5476 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5477 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5478 } else { 5479 *startsj_s = sstartsj; 5480 *startsj_r = rstartsj; 5481 *bufa_ptr = bufa; 5482 } 5483 } 5484 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5485 PetscFunctionReturn(0); 5486 } 5487 5488 /*@C 5489 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5490 5491 Not Collective 5492 5493 Input Parameters: 5494 . A - The matrix in mpiaij format 5495 5496 Output Parameter: 5497 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5498 . colmap - A map from global column index to local index into lvec 5499 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5500 5501 Level: developer 5502 5503 @*/ 5504 #if defined(PETSC_USE_CTABLE) 5505 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5506 #else 5507 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5508 #endif 5509 { 5510 Mat_MPIAIJ *a; 5511 5512 PetscFunctionBegin; 5513 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5514 PetscValidPointer(lvec, 2); 5515 PetscValidPointer(colmap, 3); 5516 PetscValidPointer(multScatter, 4); 5517 a = (Mat_MPIAIJ*) A->data; 5518 if (lvec) *lvec = a->lvec; 5519 if (colmap) *colmap = a->colmap; 5520 if (multScatter) *multScatter = a->Mvctx; 5521 PetscFunctionReturn(0); 5522 } 5523 5524 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5526 #if defined(PETSC_HAVE_MKL_SPARSE) 5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5528 #endif 5529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5530 #if defined(PETSC_HAVE_ELEMENTAL) 5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_HYPRE) 5534 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5535 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5536 #endif 5537 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5539 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5540 5541 /* 5542 Computes (B'*A')' since computing B*A directly is untenable 5543 5544 n p p 5545 ( ) ( ) ( ) 5546 m ( A ) * n ( B ) = m ( C ) 5547 ( ) ( ) ( ) 5548 5549 */ 5550 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5551 { 5552 PetscErrorCode ierr; 5553 Mat At,Bt,Ct; 5554 5555 PetscFunctionBegin; 5556 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5557 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5558 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5559 ierr = MatDestroy(&At);CHKERRQ(ierr); 5560 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5561 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5562 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5563 PetscFunctionReturn(0); 5564 } 5565 5566 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5567 { 5568 PetscErrorCode ierr; 5569 PetscInt m=A->rmap->n,n=B->cmap->n; 5570 Mat Cmat; 5571 5572 PetscFunctionBegin; 5573 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5574 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5575 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5576 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5577 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5578 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5579 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5580 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5581 5582 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5583 5584 *C = Cmat; 5585 PetscFunctionReturn(0); 5586 } 5587 5588 /* ----------------------------------------------------------------*/ 5589 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5590 { 5591 PetscErrorCode ierr; 5592 5593 PetscFunctionBegin; 5594 if (scall == MAT_INITIAL_MATRIX) { 5595 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5596 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5597 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5598 } 5599 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5600 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5601 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /*MC 5606 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5607 5608 Options Database Keys: 5609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5610 5611 Level: beginner 5612 5613 .seealso: MatCreateAIJ() 5614 M*/ 5615 5616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5617 { 5618 Mat_MPIAIJ *b; 5619 PetscErrorCode ierr; 5620 PetscMPIInt size; 5621 5622 PetscFunctionBegin; 5623 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5624 5625 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5626 B->data = (void*)b; 5627 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5628 B->assembled = PETSC_FALSE; 5629 B->insertmode = NOT_SET_VALUES; 5630 b->size = size; 5631 5632 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5633 5634 /* build cache for off array entries formed */ 5635 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5636 5637 b->donotstash = PETSC_FALSE; 5638 b->colmap = 0; 5639 b->garray = 0; 5640 b->roworiented = PETSC_TRUE; 5641 5642 /* stuff used for matrix vector multiply */ 5643 b->lvec = NULL; 5644 b->Mvctx = NULL; 5645 5646 /* stuff for MatGetRow() */ 5647 b->rowindices = 0; 5648 b->rowvalues = 0; 5649 b->getrowactive = PETSC_FALSE; 5650 5651 /* flexible pointer used in CUSP/CUSPARSE classes */ 5652 b->spptr = NULL; 5653 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5663 #if defined(PETSC_HAVE_MKL_SPARSE) 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5665 #endif 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5668 #if defined(PETSC_HAVE_ELEMENTAL) 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5670 #endif 5671 #if defined(PETSC_HAVE_HYPRE) 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5673 #endif 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5679 #if defined(PETSC_HAVE_HYPRE) 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5681 #endif 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5683 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5684 PetscFunctionReturn(0); 5685 } 5686 5687 /*@C 5688 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5689 and "off-diagonal" part of the matrix in CSR format. 5690 5691 Collective on MPI_Comm 5692 5693 Input Parameters: 5694 + comm - MPI communicator 5695 . m - number of local rows (Cannot be PETSC_DECIDE) 5696 . n - This value should be the same as the local size used in creating the 5697 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5698 calculated if N is given) For square matrices n is almost always m. 5699 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5700 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5701 . i - row indices for "diagonal" portion of matrix 5702 . j - column indices 5703 . a - matrix values 5704 . oi - row indices for "off-diagonal" portion of matrix 5705 . oj - column indices 5706 - oa - matrix values 5707 5708 Output Parameter: 5709 . mat - the matrix 5710 5711 Level: advanced 5712 5713 Notes: 5714 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5715 must free the arrays once the matrix has been destroyed and not before. 5716 5717 The i and j indices are 0 based 5718 5719 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5720 5721 This sets local rows and cannot be used to set off-processor values. 5722 5723 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5724 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5725 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5726 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5727 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5728 communication if it is known that only local entries will be set. 5729 5730 .keywords: matrix, aij, compressed row, sparse, parallel 5731 5732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5733 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5734 @*/ 5735 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5736 { 5737 PetscErrorCode ierr; 5738 Mat_MPIAIJ *maij; 5739 5740 PetscFunctionBegin; 5741 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5742 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5743 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5744 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5745 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5746 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5747 maij = (Mat_MPIAIJ*) (*mat)->data; 5748 5749 (*mat)->preallocated = PETSC_TRUE; 5750 5751 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5752 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5753 5754 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5755 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5756 5757 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5758 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5759 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5760 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5761 5762 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5763 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5764 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5765 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5766 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5767 PetscFunctionReturn(0); 5768 } 5769 5770 /* 5771 Special version for direct calls from Fortran 5772 */ 5773 #include <petsc/private/fortranimpl.h> 5774 5775 /* Change these macros so can be used in void function */ 5776 #undef CHKERRQ 5777 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5778 #undef SETERRQ2 5779 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5780 #undef SETERRQ3 5781 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5782 #undef SETERRQ 5783 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5784 5785 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5786 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5787 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5788 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5789 #else 5790 #endif 5791 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5792 { 5793 Mat mat = *mmat; 5794 PetscInt m = *mm, n = *mn; 5795 InsertMode addv = *maddv; 5796 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5797 PetscScalar value; 5798 PetscErrorCode ierr; 5799 5800 MatCheckPreallocated(mat,1); 5801 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5802 5803 #if defined(PETSC_USE_DEBUG) 5804 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5805 #endif 5806 { 5807 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5808 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5809 PetscBool roworiented = aij->roworiented; 5810 5811 /* Some Variables required in the macro */ 5812 Mat A = aij->A; 5813 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5814 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5815 MatScalar *aa = a->a; 5816 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5817 Mat B = aij->B; 5818 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5819 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5820 MatScalar *ba = b->a; 5821 5822 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5823 PetscInt nonew = a->nonew; 5824 MatScalar *ap1,*ap2; 5825 5826 PetscFunctionBegin; 5827 for (i=0; i<m; i++) { 5828 if (im[i] < 0) continue; 5829 #if defined(PETSC_USE_DEBUG) 5830 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5831 #endif 5832 if (im[i] >= rstart && im[i] < rend) { 5833 row = im[i] - rstart; 5834 lastcol1 = -1; 5835 rp1 = aj + ai[row]; 5836 ap1 = aa + ai[row]; 5837 rmax1 = aimax[row]; 5838 nrow1 = ailen[row]; 5839 low1 = 0; 5840 high1 = nrow1; 5841 lastcol2 = -1; 5842 rp2 = bj + bi[row]; 5843 ap2 = ba + bi[row]; 5844 rmax2 = bimax[row]; 5845 nrow2 = bilen[row]; 5846 low2 = 0; 5847 high2 = nrow2; 5848 5849 for (j=0; j<n; j++) { 5850 if (roworiented) value = v[i*n+j]; 5851 else value = v[i+j*m]; 5852 if (in[j] >= cstart && in[j] < cend) { 5853 col = in[j] - cstart; 5854 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5855 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5856 } else if (in[j] < 0) continue; 5857 #if defined(PETSC_USE_DEBUG) 5858 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5859 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5860 #endif 5861 else { 5862 if (mat->was_assembled) { 5863 if (!aij->colmap) { 5864 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5865 } 5866 #if defined(PETSC_USE_CTABLE) 5867 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5868 col--; 5869 #else 5870 col = aij->colmap[in[j]] - 1; 5871 #endif 5872 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5873 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5874 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5875 col = in[j]; 5876 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5877 B = aij->B; 5878 b = (Mat_SeqAIJ*)B->data; 5879 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5880 rp2 = bj + bi[row]; 5881 ap2 = ba + bi[row]; 5882 rmax2 = bimax[row]; 5883 nrow2 = bilen[row]; 5884 low2 = 0; 5885 high2 = nrow2; 5886 bm = aij->B->rmap->n; 5887 ba = b->a; 5888 } 5889 } else col = in[j]; 5890 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5891 } 5892 } 5893 } else if (!aij->donotstash) { 5894 if (roworiented) { 5895 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5896 } else { 5897 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5898 } 5899 } 5900 } 5901 } 5902 PetscFunctionReturnVoid(); 5903 } 5904