1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,i,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (i=0; i<mat_i[j+1]-mat_i[j]; i++) { 660 col = i + mat_i[j]; 661 /* If column is in the diagonal */ 662 if (mat_j[col] >= cstart && mat_j[col] < cend) { 663 aj[diag_so_far++] = mat_j[col] - cstart; 664 dnz++; 665 } else { /* off-diagonal entries */ 666 bj[offd_so_far++] = mat_j[col]; 667 onz++; 668 } 669 } 670 ailen[j] = dnz; 671 bilen[j] = onz; 672 } 673 PetscFunctionReturn(0); 674 } 675 676 /* 677 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 678 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 679 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 680 */ 681 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[], 682 const PetscInt full_diag_i[],const PetscInt full_offd_i[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 688 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 689 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 690 PetscInt *ailen = a->ilen,*aj = a->j; 691 PetscInt *bilen = b->ilen,*bj = b->j; 692 PetscInt am = aij->A->rmap->n,i,j; 693 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 694 PetscScalar *aa = a->a,*ba = b->a; 695 696 PetscFunctionBegin; 697 /* Iterate over all rows of the matrix */ 698 for (j=0; j<am; j++) { 699 dnz_row = onz_row = 0; 700 /* Iterate over all non-zero columns of the current row */ 701 for (i=0; i<mat_i[j+1]-mat_i[j]; i++) { 702 col = i + mat_i[j]; 703 rowstart_offd = full_offd_i[j]; 704 rowstart_diag = full_diag_i[j]; 705 /* If column is in the diagonal */ 706 if (mat_j[col] >= cstart && mat_j[col] < cend) { 707 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 708 aa[rowstart_diag+dnz_row] = mat_a[col]; 709 dnz_row++; 710 } else { /* off-diagonal entries */ 711 bj[rowstart_offd+onz_row] = mat_j[col]; 712 ba[rowstart_offd+onz_row] = mat_a[col]; 713 onz_row++; 714 } 715 } 716 ailen[j] = dnz_row; 717 bilen[j] = onz_row; 718 } 719 PetscFunctionReturn(0); 720 } 721 722 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 723 { 724 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 725 PetscErrorCode ierr; 726 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 727 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 728 729 PetscFunctionBegin; 730 for (i=0; i<m; i++) { 731 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 732 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 733 if (idxm[i] >= rstart && idxm[i] < rend) { 734 row = idxm[i] - rstart; 735 for (j=0; j<n; j++) { 736 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 737 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 738 if (idxn[j] >= cstart && idxn[j] < cend) { 739 col = idxn[j] - cstart; 740 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 741 } else { 742 if (!aij->colmap) { 743 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 744 } 745 #if defined(PETSC_USE_CTABLE) 746 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 747 col--; 748 #else 749 col = aij->colmap[idxn[j]] - 1; 750 #endif 751 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 752 else { 753 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 754 } 755 } 756 } 757 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 763 764 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 765 { 766 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 767 PetscErrorCode ierr; 768 PetscInt nstash,reallocs; 769 770 PetscFunctionBegin; 771 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 772 773 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 774 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 775 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 776 PetscFunctionReturn(0); 777 } 778 779 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 780 { 781 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 782 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 783 PetscErrorCode ierr; 784 PetscMPIInt n; 785 PetscInt i,j,rstart,ncols,flg; 786 PetscInt *row,*col; 787 PetscBool other_disassembled; 788 PetscScalar *val; 789 790 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 791 792 PetscFunctionBegin; 793 if (!aij->donotstash && !mat->nooffprocentries) { 794 while (1) { 795 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 796 if (!flg) break; 797 798 for (i=0; i<n; ) { 799 /* Now identify the consecutive vals belonging to the same row */ 800 for (j=i,rstart=row[j]; j<n; j++) { 801 if (row[j] != rstart) break; 802 } 803 if (j < n) ncols = j-i; 804 else ncols = n-i; 805 /* Now assemble all these values with a single function call */ 806 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 807 808 i = j; 809 } 810 } 811 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 812 } 813 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 814 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourselfs, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 823 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 824 if (mat->was_assembled && !other_disassembled) { 825 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 829 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 830 } 831 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 832 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 834 835 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 836 837 aij->rowvalues = 0; 838 839 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 840 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 851 { 852 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 853 PetscErrorCode ierr; 854 855 PetscFunctionBegin; 856 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 857 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 862 { 863 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 864 PetscInt *lrows; 865 PetscInt r, len; 866 PetscBool cong; 867 PetscErrorCode ierr; 868 869 PetscFunctionBegin; 870 /* get locally owned rows */ 871 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 872 /* fix right hand side if needed */ 873 if (x && b) { 874 const PetscScalar *xx; 875 PetscScalar *bb; 876 877 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 878 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 879 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 880 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 881 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 882 } 883 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 884 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 885 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 886 if ((diag != 0.0) && cong) { 887 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 888 } else if (diag != 0.0) { 889 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 890 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 894 } 895 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 896 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 } else { 898 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 899 } 900 ierr = PetscFree(lrows);CHKERRQ(ierr); 901 902 /* only change matrix nonzero state if pattern was allowed to be changed */ 903 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 904 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 905 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 906 } 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 913 PetscErrorCode ierr; 914 PetscMPIInt n = A->rmap->n; 915 PetscInt i,j,r,m,p = 0,len = 0; 916 PetscInt *lrows,*owners = A->rmap->range; 917 PetscSFNode *rrows; 918 PetscSF sf; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 926 PetscFunctionBegin; 927 /* Create SF where leaves are input rows and roots are owned rows */ 928 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 929 for (r = 0; r < n; ++r) lrows[r] = -1; 930 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 931 for (r = 0; r < N; ++r) { 932 const PetscInt idx = rows[r]; 933 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 934 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 935 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 936 } 937 rrows[r].rank = p; 938 rrows[r].index = rows[r] - owners[p]; 939 } 940 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 941 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 942 /* Collect flags for rows to be zeroed */ 943 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 944 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 946 /* Compress and put in row numbers */ 947 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 948 /* zero diagonal part of matrix */ 949 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 950 /* handle off diagonal part of matrix */ 951 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 952 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 953 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 954 for (i=0; i<len; i++) bb[lrows[i]] = 1; 955 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 956 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 959 if (x) { 960 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 963 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 964 } 965 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 966 /* remove zeroed rows of off diagonal matrix */ 967 ii = aij->i; 968 for (i=0; i<len; i++) { 969 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 970 } 971 /* loop over all elements of off process part of matrix zeroing removed columns*/ 972 if (aij->compressedrow.use) { 973 m = aij->compressedrow.nrows; 974 ii = aij->compressedrow.i; 975 ridx = aij->compressedrow.rindex; 976 for (i=0; i<m; i++) { 977 n = ii[i+1] - ii[i]; 978 aj = aij->j + ii[i]; 979 aa = aij->a + ii[i]; 980 981 for (j=0; j<n; j++) { 982 if (PetscAbsScalar(mask[*aj])) { 983 if (b) bb[*ridx] -= *aa*xx[*aj]; 984 *aa = 0.0; 985 } 986 aa++; 987 aj++; 988 } 989 ridx++; 990 } 991 } else { /* do not use compressed row format */ 992 m = l->B->rmap->n; 993 for (i=0; i<m; i++) { 994 n = ii[i+1] - ii[i]; 995 aj = aij->j + ii[i]; 996 aa = aij->a + ii[i]; 997 for (j=0; j<n; j++) { 998 if (PetscAbsScalar(mask[*aj])) { 999 if (b) bb[i] -= *aa*xx[*aj]; 1000 *aa = 0.0; 1001 } 1002 aa++; 1003 aj++; 1004 } 1005 } 1006 } 1007 if (x) { 1008 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1009 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1010 } 1011 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1012 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1013 ierr = PetscFree(lrows);CHKERRQ(ierr); 1014 1015 /* only change matrix nonzero state if pattern was allowed to be changed */ 1016 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1017 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1018 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1024 { 1025 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1026 PetscErrorCode ierr; 1027 PetscInt nt; 1028 VecScatter Mvctx = a->Mvctx; 1029 1030 PetscFunctionBegin; 1031 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1032 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1033 1034 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1042 { 1043 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1044 PetscErrorCode ierr; 1045 1046 PetscFunctionBegin; 1047 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 VecScatter Mvctx = a->Mvctx; 1056 1057 PetscFunctionBegin; 1058 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1059 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1061 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1062 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1069 PetscErrorCode ierr; 1070 PetscBool merged; 1071 1072 PetscFunctionBegin; 1073 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 if (!merged) { 1077 /* send it on its way */ 1078 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 /* do local part */ 1080 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1081 /* receive remote parts: note this assumes the values are not actually */ 1082 /* added in yy until the next line, */ 1083 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1084 } else { 1085 /* do local part */ 1086 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1087 /* send it on its way */ 1088 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1089 /* values actually were received in the Begin() but we need to call this nop */ 1090 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1099 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1100 IS Me,Notme; 1101 PetscErrorCode ierr; 1102 PetscInt M,N,first,last,*notme,i; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1108 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1109 if (!*f) PetscFunctionReturn(0); 1110 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1112 if (size == 1) PetscFunctionReturn(0); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1116 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1117 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1118 for (i=0; i<first; i++) notme[i] = i; 1119 for (i=last; i<M; i++) notme[i-last+first] = i; 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1121 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1122 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1123 Aoff = Aoffs[0]; 1124 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1125 Boff = Boffs[0]; 1126 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1127 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1129 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1131 ierr = PetscFree(notme);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1136 { 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1152 /* send it on its way */ 1153 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1156 /* receive remote parts */ 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /* 1162 This only works correctly for square matrices where the subblock A->A is the 1163 diagonal block 1164 */ 1165 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1166 { 1167 PetscErrorCode ierr; 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1169 1170 PetscFunctionBegin; 1171 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1172 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1173 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1184 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1189 { 1190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 #if defined(PETSC_USE_LOG) 1195 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1196 #endif 1197 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1198 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1199 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1201 #if defined(PETSC_USE_CTABLE) 1202 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1203 #else 1204 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1205 #endif 1206 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1207 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1208 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1209 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1210 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1211 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1212 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1213 1214 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1223 #if defined(PETSC_HAVE_ELEMENTAL) 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1225 #endif 1226 #if defined(PETSC_HAVE_HYPRE) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1229 #endif 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1236 { 1237 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1238 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1239 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1240 PetscErrorCode ierr; 1241 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1242 int fd; 1243 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1244 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1245 PetscScalar *column_values; 1246 PetscInt message_count,flowcontrolcount; 1247 FILE *file; 1248 1249 PetscFunctionBegin; 1250 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1251 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1252 nz = A->nz + B->nz; 1253 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1254 if (!rank) { 1255 header[0] = MAT_FILE_CLASSID; 1256 header[1] = mat->rmap->N; 1257 header[2] = mat->cmap->N; 1258 1259 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1261 /* get largest number of rows any processor has */ 1262 rlen = mat->rmap->n; 1263 range = mat->rmap->range; 1264 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1265 } else { 1266 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 rlen = mat->rmap->n; 1268 } 1269 1270 /* load up the local row counts */ 1271 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1272 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1273 1274 /* store the row lengths to the file */ 1275 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1276 if (!rank) { 1277 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1278 for (i=1; i<size; i++) { 1279 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1280 rlen = range[i+1] - range[i]; 1281 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1283 } 1284 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1285 } else { 1286 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1287 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1291 1292 /* load up the local column indices */ 1293 nzmax = nz; /* th processor needs space a largest processor needs */ 1294 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1296 cnt = 0; 1297 for (i=0; i<mat->rmap->n; i++) { 1298 for (j=B->i[i]; j<B->i[i+1]; j++) { 1299 if ((col = garray[B->j[j]]) > cstart) break; 1300 column_indices[cnt++] = col; 1301 } 1302 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1303 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1304 } 1305 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1306 1307 /* store the column indices to the file */ 1308 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1309 if (!rank) { 1310 MPI_Status status; 1311 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1312 for (i=1; i<size; i++) { 1313 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1314 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1315 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1316 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1317 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1318 } 1319 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1322 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1325 } 1326 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1327 1328 /* load up the local column values */ 1329 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1330 cnt = 0; 1331 for (i=0; i<mat->rmap->n; i++) { 1332 for (j=B->i[i]; j<B->i[i+1]; j++) { 1333 if (garray[B->j[j]] > cstart) break; 1334 column_values[cnt++] = B->a[j]; 1335 } 1336 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1337 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1338 } 1339 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1340 1341 /* store the column values to the file */ 1342 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1343 if (!rank) { 1344 MPI_Status status; 1345 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1346 for (i=1; i<size; i++) { 1347 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1348 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1349 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1350 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1352 } 1353 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1356 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1359 } 1360 ierr = PetscFree(column_values);CHKERRQ(ierr); 1361 1362 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1363 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #include <petscdraw.h> 1368 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1369 { 1370 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1371 PetscErrorCode ierr; 1372 PetscMPIInt rank = aij->rank,size = aij->size; 1373 PetscBool isdraw,iascii,isbinary; 1374 PetscViewer sviewer; 1375 PetscViewerFormat format; 1376 1377 PetscFunctionBegin; 1378 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1381 if (iascii) { 1382 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1383 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1384 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1385 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1386 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1387 for (i=0; i<(PetscInt)size; i++) { 1388 nmax = PetscMax(nmax,nz[i]); 1389 nmin = PetscMin(nmin,nz[i]); 1390 navg += nz[i]; 1391 } 1392 ierr = PetscFree(nz);CHKERRQ(ierr); 1393 navg = navg/size; 1394 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1398 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1399 MatInfo info; 1400 PetscBool inodes; 1401 1402 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1403 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1404 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1405 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1406 if (!inodes) { 1407 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1408 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1409 } else { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } 1413 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1415 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1417 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1420 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1423 PetscInt inodecount,inodelimit,*inodes; 1424 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1425 if (inodes) { 1426 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1427 } else { 1428 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1432 PetscFunctionReturn(0); 1433 } 1434 } else if (isbinary) { 1435 if (size == 1) { 1436 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1437 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1438 } else { 1439 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } else if (isdraw) { 1443 PetscDraw draw; 1444 PetscBool isnull; 1445 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1446 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1447 if (isnull) PetscFunctionReturn(0); 1448 } 1449 1450 { 1451 /* assemble the entire matrix onto first processor. */ 1452 Mat A; 1453 Mat_SeqAIJ *Aloc; 1454 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1455 MatScalar *a; 1456 1457 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1458 if (!rank) { 1459 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1460 } else { 1461 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1462 } 1463 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1464 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1465 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1466 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1468 1469 /* copy over the A part */ 1470 Aloc = (Mat_SeqAIJ*)aij->A->data; 1471 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1472 row = mat->rmap->rstart; 1473 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1474 for (i=0; i<m; i++) { 1475 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1476 row++; 1477 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1478 } 1479 aj = Aloc->j; 1480 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1481 1482 /* copy over the B part */ 1483 Aloc = (Mat_SeqAIJ*)aij->B->data; 1484 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1485 row = mat->rmap->rstart; 1486 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1487 ct = cols; 1488 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1489 for (i=0; i<m; i++) { 1490 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1491 row++; 1492 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1493 } 1494 ierr = PetscFree(ct);CHKERRQ(ierr); 1495 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1496 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 /* 1498 Everyone has to call to draw the matrix since the graphics waits are 1499 synchronized across all processors that share the PetscDraw object 1500 */ 1501 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1502 if (!rank) { 1503 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1504 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1505 } 1506 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1507 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1508 ierr = MatDestroy(&A);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1514 { 1515 PetscErrorCode ierr; 1516 PetscBool iascii,isdraw,issocket,isbinary; 1517 1518 PetscFunctionBegin; 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1523 if (iascii || isdraw || isbinary || issocket) { 1524 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1530 { 1531 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1532 PetscErrorCode ierr; 1533 Vec bb1 = 0; 1534 PetscBool hasop; 1535 1536 PetscFunctionBegin; 1537 if (flag == SOR_APPLY_UPPER) { 1538 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1543 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1544 } 1545 1546 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1547 if (flag & SOR_ZERO_INITIAL_GUESS) { 1548 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1549 its--; 1550 } 1551 1552 while (its--) { 1553 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 1556 /* update rhs: bb1 = bb - B*x */ 1557 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1558 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1562 } 1563 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1564 if (flag & SOR_ZERO_INITIAL_GUESS) { 1565 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1566 its--; 1567 } 1568 while (its--) { 1569 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1570 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 1572 /* update rhs: bb1 = bb - B*x */ 1573 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1574 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1575 1576 /* local sweep */ 1577 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1578 } 1579 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1580 if (flag & SOR_ZERO_INITIAL_GUESS) { 1581 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1582 its--; 1583 } 1584 while (its--) { 1585 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 1588 /* update rhs: bb1 = bb - B*x */ 1589 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1590 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1591 1592 /* local sweep */ 1593 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1594 } 1595 } else if (flag & SOR_EISENSTAT) { 1596 Vec xx1; 1597 1598 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1600 1601 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 if (!mat->diag) { 1604 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1605 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1606 } 1607 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1608 if (hasop) { 1609 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1610 } else { 1611 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1612 } 1613 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1614 1615 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1616 1617 /* local sweep */ 1618 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1619 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1620 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1621 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1622 1623 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1624 1625 matin->factorerrortype = mat->A->factorerrortype; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1630 { 1631 Mat aA,aB,Aperm; 1632 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1633 PetscScalar *aa,*ba; 1634 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1635 PetscSF rowsf,sf; 1636 IS parcolp = NULL; 1637 PetscBool done; 1638 PetscErrorCode ierr; 1639 1640 PetscFunctionBegin; 1641 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1642 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1643 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1644 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1645 1646 /* Invert row permutation to find out where my rows should go */ 1647 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1648 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1649 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1651 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1652 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 1654 /* Invert column permutation to find out where my columns should go */ 1655 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1656 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1657 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1658 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1659 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1660 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1662 1663 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1664 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1665 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1666 1667 /* Find out where my gcols should go */ 1668 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1669 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1670 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1671 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1672 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1673 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1674 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1676 1677 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1678 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1680 for (i=0; i<m; i++) { 1681 PetscInt row = rdest[i],rowner; 1682 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1683 for (j=ai[i]; j<ai[i+1]; j++) { 1684 PetscInt cowner,col = cdest[aj[j]]; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt cowner,col = gcdest[bj[j]]; 1691 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1692 if (rowner == cowner) dnnz[i]++; 1693 else onnz[i]++; 1694 } 1695 } 1696 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1701 1702 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1703 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1705 for (i=0; i<m; i++) { 1706 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1707 PetscInt j0,rowlen; 1708 rowlen = ai[i+1] - ai[i]; 1709 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1710 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1711 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1712 } 1713 rowlen = bi[i+1] - bi[i]; 1714 for (j0=j=0; j<rowlen; j0=j) { 1715 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1716 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1717 } 1718 } 1719 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1723 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1725 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1726 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1727 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1728 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1729 *B = Aperm; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1734 { 1735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1740 if (ghosts) *ghosts = aij->garray; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1745 { 1746 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1747 Mat A = mat->A,B = mat->B; 1748 PetscErrorCode ierr; 1749 PetscReal isend[5],irecv[5]; 1750 1751 PetscFunctionBegin; 1752 info->block_size = 1.0; 1753 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1754 1755 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1756 isend[3] = info->memory; isend[4] = info->mallocs; 1757 1758 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1759 1760 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1761 isend[3] += info->memory; isend[4] += info->mallocs; 1762 if (flag == MAT_LOCAL) { 1763 info->nz_used = isend[0]; 1764 info->nz_allocated = isend[1]; 1765 info->nz_unneeded = isend[2]; 1766 info->memory = isend[3]; 1767 info->mallocs = isend[4]; 1768 } else if (flag == MAT_GLOBAL_MAX) { 1769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1770 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } else if (flag == MAT_GLOBAL_SUM) { 1777 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1778 1779 info->nz_used = irecv[0]; 1780 info->nz_allocated = irecv[1]; 1781 info->nz_unneeded = irecv[2]; 1782 info->memory = irecv[3]; 1783 info->mallocs = irecv[4]; 1784 } 1785 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1786 info->fill_ratio_needed = 0; 1787 info->factor_mallocs = 0; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1792 { 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 switch (op) { 1798 case MAT_NEW_NONZERO_LOCATIONS: 1799 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1800 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1801 case MAT_KEEP_NONZERO_PATTERN: 1802 case MAT_NEW_NONZERO_LOCATION_ERR: 1803 case MAT_USE_INODES: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 MatCheckPreallocated(A,1); 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_ROW_ORIENTED: 1810 MatCheckPreallocated(A,1); 1811 a->roworiented = flg; 1812 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_NEW_DIAGONALS: 1817 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1818 break; 1819 case MAT_IGNORE_OFF_PROC_ENTRIES: 1820 a->donotstash = flg; 1821 break; 1822 case MAT_SPD: 1823 A->spd_set = PETSC_TRUE; 1824 A->spd = flg; 1825 if (flg) { 1826 A->symmetric = PETSC_TRUE; 1827 A->structurally_symmetric = PETSC_TRUE; 1828 A->symmetric_set = PETSC_TRUE; 1829 A->structurally_symmetric_set = PETSC_TRUE; 1830 } 1831 break; 1832 case MAT_SYMMETRIC: 1833 MatCheckPreallocated(A,1); 1834 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1835 break; 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 MatCheckPreallocated(A,1); 1838 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1839 break; 1840 case MAT_HERMITIAN: 1841 MatCheckPreallocated(A,1); 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 break; 1844 case MAT_SYMMETRY_ETERNAL: 1845 MatCheckPreallocated(A,1); 1846 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1847 break; 1848 case MAT_SUBMAT_SINGLEIS: 1849 A->submat_singleis = flg; 1850 break; 1851 case MAT_STRUCTURE_ONLY: 1852 /* The option is handled directly by MatSetOption() */ 1853 break; 1854 default: 1855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1856 } 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1861 { 1862 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1863 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1864 PetscErrorCode ierr; 1865 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1866 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1867 PetscInt *cmap,*idx_p; 1868 1869 PetscFunctionBegin; 1870 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1871 mat->getrowactive = PETSC_TRUE; 1872 1873 if (!mat->rowvalues && (idx || v)) { 1874 /* 1875 allocate enough space to hold information from the longest row. 1876 */ 1877 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1878 PetscInt max = 1,tmp; 1879 for (i=0; i<matin->rmap->n; i++) { 1880 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1881 if (max < tmp) max = tmp; 1882 } 1883 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1884 } 1885 1886 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1887 lrow = row - rstart; 1888 1889 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1890 if (!v) {pvA = 0; pvB = 0;} 1891 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1892 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1893 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1894 nztot = nzA + nzB; 1895 1896 cmap = mat->garray; 1897 if (v || idx) { 1898 if (nztot) { 1899 /* Sort by increasing column numbers, assuming A and B already sorted */ 1900 PetscInt imark = -1; 1901 if (v) { 1902 *v = v_p = mat->rowvalues; 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1905 else break; 1906 } 1907 imark = i; 1908 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1909 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1910 } 1911 if (idx) { 1912 *idx = idx_p = mat->rowindices; 1913 if (imark > -1) { 1914 for (i=0; i<imark; i++) { 1915 idx_p[i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 for (i=0; i<nzB; i++) { 1919 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1920 else break; 1921 } 1922 imark = i; 1923 } 1924 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1925 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1926 } 1927 } else { 1928 if (idx) *idx = 0; 1929 if (v) *v = 0; 1930 } 1931 } 1932 *nz = nztot; 1933 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1934 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1939 { 1940 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1941 1942 PetscFunctionBegin; 1943 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1944 aij->getrowactive = PETSC_FALSE; 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1949 { 1950 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1951 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1952 PetscErrorCode ierr; 1953 PetscInt i,j,cstart = mat->cmap->rstart; 1954 PetscReal sum = 0.0; 1955 MatScalar *v; 1956 1957 PetscFunctionBegin; 1958 if (aij->size == 1) { 1959 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1960 } else { 1961 if (type == NORM_FROBENIUS) { 1962 v = amat->a; 1963 for (i=0; i<amat->nz; i++) { 1964 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1965 } 1966 v = bmat->a; 1967 for (i=0; i<bmat->nz; i++) { 1968 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1969 } 1970 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 *norm = PetscSqrtReal(*norm); 1972 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1973 } else if (type == NORM_1) { /* max column norm */ 1974 PetscReal *tmp,*tmp2; 1975 PetscInt *jj,*garray = aij->garray; 1976 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1977 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1978 *norm = 0.0; 1979 v = amat->a; jj = amat->j; 1980 for (j=0; j<amat->nz; j++) { 1981 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1982 } 1983 v = bmat->a; jj = bmat->j; 1984 for (j=0; j<bmat->nz; j++) { 1985 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1986 } 1987 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1988 for (j=0; j<mat->cmap->N; j++) { 1989 if (tmp2[j] > *norm) *norm = tmp2[j]; 1990 } 1991 ierr = PetscFree(tmp);CHKERRQ(ierr); 1992 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1993 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1994 } else if (type == NORM_INFINITY) { /* max row norm */ 1995 PetscReal ntemp = 0.0; 1996 for (j=0; j<aij->A->rmap->n; j++) { 1997 v = amat->a + amat->i[j]; 1998 sum = 0.0; 1999 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a + bmat->i[j]; 2003 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 if (sum > ntemp) ntemp = sum; 2007 } 2008 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2009 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2010 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2011 } 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2016 { 2017 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2018 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2019 PetscErrorCode ierr; 2020 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2021 PetscInt cstart = A->cmap->rstart,ncol; 2022 Mat B; 2023 MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2040 } 2041 /* compute local off-diagonal contributions */ 2042 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2043 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2044 /* map those to global */ 2045 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2046 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2047 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2048 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2049 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2051 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2052 2053 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2054 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2055 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2056 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2057 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2058 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2059 } else { 2060 B = *matout; 2061 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2062 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2063 } 2064 2065 /* copy over the A part */ 2066 array = Aloc->a; 2067 row = A->rmap->rstart; 2068 for (i=0; i<ma; i++) { 2069 ncol = ai[i+1]-ai[i]; 2070 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2071 row++; 2072 array += ncol; aj += ncol; 2073 } 2074 aj = Aloc->j; 2075 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2076 2077 /* copy over the B part */ 2078 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2079 array = Bloc->a; 2080 row = A->rmap->rstart; 2081 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2082 cols_tmp = cols; 2083 for (i=0; i<mb; i++) { 2084 ncol = bi[i+1]-bi[i]; 2085 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2086 row++; 2087 array += ncol; cols_tmp += ncol; 2088 } 2089 ierr = PetscFree(cols);CHKERRQ(ierr); 2090 2091 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2092 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2093 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2094 *matout = B; 2095 } else { 2096 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2097 } 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2102 { 2103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2104 Mat a = aij->A,b = aij->B; 2105 PetscErrorCode ierr; 2106 PetscInt s1,s2,s3; 2107 2108 PetscFunctionBegin; 2109 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2110 if (rr) { 2111 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2112 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2113 /* Overlap communication with computation. */ 2114 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2115 } 2116 if (ll) { 2117 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2118 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2119 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2120 } 2121 /* scale the diagonal block */ 2122 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2123 2124 if (rr) { 2125 /* Do a scatter end and then right scale the off-diagonal block */ 2126 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2127 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2128 } 2129 PetscFunctionReturn(0); 2130 } 2131 2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2143 { 2144 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2145 Mat a,b,c,d; 2146 PetscBool flg; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 a = matA->A; b = matA->B; 2151 c = matB->A; d = matB->B; 2152 2153 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2154 if (flg) { 2155 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2156 } 2157 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2162 { 2163 PetscErrorCode ierr; 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2166 2167 PetscFunctionBegin; 2168 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2169 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2170 /* because of the column compression in the off-processor part of the matrix a->B, 2171 the number of columns in a->B and b->B may be different, hence we cannot call 2172 the MatCopy() directly on the two parts. If need be, we can provide a more 2173 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2174 then copying the submatrices */ 2175 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2178 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2179 } 2180 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2185 { 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /* 2194 Computes the number of nonzeros per row needed for preallocation when X and Y 2195 have different nonzero structure. 2196 */ 2197 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2198 { 2199 PetscInt i,j,k,nzx,nzy; 2200 2201 PetscFunctionBegin; 2202 /* Set the number of nonzeros in the new matrix */ 2203 for (i=0; i<m; i++) { 2204 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2205 nzx = xi[i+1] - xi[i]; 2206 nzy = yi[i+1] - yi[i]; 2207 nnz[i] = 0; 2208 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2209 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2210 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2211 nnz[i]++; 2212 } 2213 for (; k<nzy; k++) nnz[i]++; 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2219 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2220 { 2221 PetscErrorCode ierr; 2222 PetscInt m = Y->rmap->N; 2223 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2224 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2225 2226 PetscFunctionBegin; 2227 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2232 { 2233 PetscErrorCode ierr; 2234 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2235 PetscBLASInt bnz,one=1; 2236 Mat_SeqAIJ *x,*y; 2237 2238 PetscFunctionBegin; 2239 if (str == SAME_NONZERO_PATTERN) { 2240 PetscScalar alpha = a; 2241 x = (Mat_SeqAIJ*)xx->A->data; 2242 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2243 y = (Mat_SeqAIJ*)yy->A->data; 2244 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2245 x = (Mat_SeqAIJ*)xx->B->data; 2246 y = (Mat_SeqAIJ*)yy->B->data; 2247 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2248 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2249 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2250 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2251 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2252 } else { 2253 Mat B; 2254 PetscInt *nnz_d,*nnz_o; 2255 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2256 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2257 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2258 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2259 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2260 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2261 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2262 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2263 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2264 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2265 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2266 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2267 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2268 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2269 } 2270 PetscFunctionReturn(0); 2271 } 2272 2273 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2274 2275 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2276 { 2277 #if defined(PETSC_USE_COMPLEX) 2278 PetscErrorCode ierr; 2279 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2280 2281 PetscFunctionBegin; 2282 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2283 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2284 #else 2285 PetscFunctionBegin; 2286 #endif 2287 PetscFunctionReturn(0); 2288 } 2289 2290 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2297 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2302 { 2303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 2306 PetscFunctionBegin; 2307 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2308 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2313 { 2314 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2315 PetscErrorCode ierr; 2316 PetscInt i,*idxb = 0; 2317 PetscScalar *va,*vb; 2318 Vec vtmp; 2319 2320 PetscFunctionBegin; 2321 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2322 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2323 if (idx) { 2324 for (i=0; i<A->rmap->n; i++) { 2325 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2326 } 2327 } 2328 2329 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2330 if (idx) { 2331 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2332 } 2333 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2334 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2335 2336 for (i=0; i<A->rmap->n; i++) { 2337 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2338 va[i] = vb[i]; 2339 if (idx) idx[i] = a->garray[idxb[i]]; 2340 } 2341 } 2342 2343 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2344 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2345 ierr = PetscFree(idxb);CHKERRQ(ierr); 2346 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2351 { 2352 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2353 PetscErrorCode ierr; 2354 PetscInt i,*idxb = 0; 2355 PetscScalar *va,*vb; 2356 Vec vtmp; 2357 2358 PetscFunctionBegin; 2359 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2360 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2361 if (idx) { 2362 for (i=0; i<A->cmap->n; i++) { 2363 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2364 } 2365 } 2366 2367 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2368 if (idx) { 2369 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2370 } 2371 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2372 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2373 2374 for (i=0; i<A->rmap->n; i++) { 2375 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2376 va[i] = vb[i]; 2377 if (idx) idx[i] = a->garray[idxb[i]]; 2378 } 2379 } 2380 2381 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2382 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2383 ierr = PetscFree(idxb);CHKERRQ(ierr); 2384 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2389 { 2390 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2391 PetscInt n = A->rmap->n; 2392 PetscInt cstart = A->cmap->rstart; 2393 PetscInt *cmap = mat->garray; 2394 PetscInt *diagIdx, *offdiagIdx; 2395 Vec diagV, offdiagV; 2396 PetscScalar *a, *diagA, *offdiagA; 2397 PetscInt r; 2398 PetscErrorCode ierr; 2399 2400 PetscFunctionBegin; 2401 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2402 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2403 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2404 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2405 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2406 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2407 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2408 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2409 for (r = 0; r < n; ++r) { 2410 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2411 a[r] = diagA[r]; 2412 idx[r] = cstart + diagIdx[r]; 2413 } else { 2414 a[r] = offdiagA[r]; 2415 idx[r] = cmap[offdiagIdx[r]]; 2416 } 2417 } 2418 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2420 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2421 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2422 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2423 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2428 { 2429 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2430 PetscInt n = A->rmap->n; 2431 PetscInt cstart = A->cmap->rstart; 2432 PetscInt *cmap = mat->garray; 2433 PetscInt *diagIdx, *offdiagIdx; 2434 Vec diagV, offdiagV; 2435 PetscScalar *a, *diagA, *offdiagA; 2436 PetscInt r; 2437 PetscErrorCode ierr; 2438 2439 PetscFunctionBegin; 2440 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2441 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2442 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2443 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2444 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2445 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2446 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2447 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2448 for (r = 0; r < n; ++r) { 2449 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2450 a[r] = diagA[r]; 2451 idx[r] = cstart + diagIdx[r]; 2452 } else { 2453 a[r] = offdiagA[r]; 2454 idx[r] = cmap[offdiagIdx[r]]; 2455 } 2456 } 2457 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2458 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2459 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2460 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2461 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2462 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2467 { 2468 PetscErrorCode ierr; 2469 Mat *dummy; 2470 2471 PetscFunctionBegin; 2472 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2473 *newmat = *dummy; 2474 ierr = PetscFree(dummy);CHKERRQ(ierr); 2475 PetscFunctionReturn(0); 2476 } 2477 2478 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2479 { 2480 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2485 A->factorerrortype = a->A->factorerrortype; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2490 { 2491 PetscErrorCode ierr; 2492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2493 2494 PetscFunctionBegin; 2495 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2496 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2497 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2499 PetscFunctionReturn(0); 2500 } 2501 2502 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2503 { 2504 PetscFunctionBegin; 2505 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2506 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2507 PetscFunctionReturn(0); 2508 } 2509 2510 /*@ 2511 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2512 2513 Collective on Mat 2514 2515 Input Parameters: 2516 + A - the matrix 2517 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2518 2519 Level: advanced 2520 2521 @*/ 2522 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2523 { 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2532 { 2533 PetscErrorCode ierr; 2534 PetscBool sc = PETSC_FALSE,flg; 2535 2536 PetscFunctionBegin; 2537 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2538 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2539 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2540 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2541 if (flg) { 2542 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2543 } 2544 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2549 { 2550 PetscErrorCode ierr; 2551 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2552 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2553 2554 PetscFunctionBegin; 2555 if (!Y->preallocated) { 2556 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2557 } else if (!aij->nz) { 2558 PetscInt nonew = aij->nonew; 2559 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2560 aij->nonew = nonew; 2561 } 2562 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2563 PetscFunctionReturn(0); 2564 } 2565 2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2573 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2574 if (d) { 2575 PetscInt rstart; 2576 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2577 *d += rstart; 2578 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 MatGetDiagonalBlock_MPIAIJ, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatCreateSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatCreateSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 MatIsSymmetric_MPIAIJ, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 0, 2713 MatCreateSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 MatRARt_MPIAIJ_MPIAIJ, 2722 0, 2723 0, 2724 /*139*/MatSetBlockSizes_MPIAIJ, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2757 { 2758 Mat_MPIAIJ *b; 2759 PetscErrorCode ierr; 2760 2761 PetscFunctionBegin; 2762 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2763 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2764 b = (Mat_MPIAIJ*)B->data; 2765 2766 #if defined(PETSC_USE_CTABLE) 2767 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2768 #else 2769 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2770 #endif 2771 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2772 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2773 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2774 2775 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2776 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2777 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2778 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2780 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2782 2783 if (!B->preallocated) { 2784 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2785 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2786 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2787 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2788 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2789 } 2790 2791 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2792 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2793 B->preallocated = PETSC_TRUE; 2794 B->was_assembled = PETSC_FALSE; 2795 B->assembled = PETSC_FALSE;; 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2800 { 2801 Mat_MPIAIJ *b; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2806 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2807 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2808 b = (Mat_MPIAIJ*)B->data; 2809 2810 #if defined(PETSC_USE_CTABLE) 2811 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2812 #else 2813 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2814 #endif 2815 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2816 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2817 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2818 2819 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2820 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2821 B->preallocated = PETSC_TRUE; 2822 B->was_assembled = PETSC_FALSE; 2823 B->assembled = PETSC_FALSE; 2824 PetscFunctionReturn(0); 2825 } 2826 2827 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2828 { 2829 Mat mat; 2830 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2831 PetscErrorCode ierr; 2832 2833 PetscFunctionBegin; 2834 *newmat = 0; 2835 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2836 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2837 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2838 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2839 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2840 a = (Mat_MPIAIJ*)mat->data; 2841 2842 mat->factortype = matin->factortype; 2843 mat->assembled = PETSC_TRUE; 2844 mat->insertmode = NOT_SET_VALUES; 2845 mat->preallocated = PETSC_TRUE; 2846 2847 a->size = oldmat->size; 2848 a->rank = oldmat->rank; 2849 a->donotstash = oldmat->donotstash; 2850 a->roworiented = oldmat->roworiented; 2851 a->rowindices = 0; 2852 a->rowvalues = 0; 2853 a->getrowactive = PETSC_FALSE; 2854 2855 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2856 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2857 2858 if (oldmat->colmap) { 2859 #if defined(PETSC_USE_CTABLE) 2860 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2861 #else 2862 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2863 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2864 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2865 #endif 2866 } else a->colmap = 0; 2867 if (oldmat->garray) { 2868 PetscInt len; 2869 len = oldmat->B->cmap->n; 2870 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2871 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2872 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2873 } else a->garray = 0; 2874 2875 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2876 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2877 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2878 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2879 2880 if (oldmat->Mvctx_mpi1) { 2881 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2883 } 2884 2885 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2887 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2888 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2889 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2890 *newmat = mat; 2891 PetscFunctionReturn(0); 2892 } 2893 2894 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2895 { 2896 PetscScalar *vals,*svals; 2897 MPI_Comm comm; 2898 PetscErrorCode ierr; 2899 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2900 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2901 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2902 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2903 PetscInt cend,cstart,n,*rowners; 2904 int fd; 2905 PetscInt bs = newMat->rmap->bs; 2906 2907 PetscFunctionBegin; 2908 /* force binary viewer to load .info file if it has not yet done so */ 2909 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2910 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2913 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2914 if (!rank) { 2915 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2916 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2917 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2918 } 2919 2920 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2921 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2922 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2923 if (bs < 0) bs = 1; 2924 2925 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2926 M = header[1]; N = header[2]; 2927 2928 /* If global sizes are set, check if they are consistent with that given in the file */ 2929 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2930 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2931 2932 /* determine ownership of all (block) rows */ 2933 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2934 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2935 else m = newMat->rmap->n; /* Set by user */ 2936 2937 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2938 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2939 2940 /* First process needs enough room for process with most rows */ 2941 if (!rank) { 2942 mmax = rowners[1]; 2943 for (i=2; i<=size; i++) { 2944 mmax = PetscMax(mmax, rowners[i]); 2945 } 2946 } else mmax = -1; /* unused, but compilers complain */ 2947 2948 rowners[0] = 0; 2949 for (i=2; i<=size; i++) { 2950 rowners[i] += rowners[i-1]; 2951 } 2952 rstart = rowners[rank]; 2953 rend = rowners[rank+1]; 2954 2955 /* distribute row lengths to all processors */ 2956 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2957 if (!rank) { 2958 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2959 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2960 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2961 for (j=0; j<m; j++) { 2962 procsnz[0] += ourlens[j]; 2963 } 2964 for (i=1; i<size; i++) { 2965 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2966 /* calculate the number of nonzeros on each processor */ 2967 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2968 procsnz[i] += rowlengths[j]; 2969 } 2970 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2971 } 2972 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2973 } else { 2974 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2975 } 2976 2977 if (!rank) { 2978 /* determine max buffer needed and allocate it */ 2979 maxnz = 0; 2980 for (i=0; i<size; i++) { 2981 maxnz = PetscMax(maxnz,procsnz[i]); 2982 } 2983 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2984 2985 /* read in my part of the matrix column indices */ 2986 nz = procsnz[0]; 2987 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2988 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2989 2990 /* read in every one elses and ship off */ 2991 for (i=1; i<size; i++) { 2992 nz = procsnz[i]; 2993 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2994 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2995 } 2996 ierr = PetscFree(cols);CHKERRQ(ierr); 2997 } else { 2998 /* determine buffer space needed for message */ 2999 nz = 0; 3000 for (i=0; i<m; i++) { 3001 nz += ourlens[i]; 3002 } 3003 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3004 3005 /* receive message of column indices*/ 3006 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3007 } 3008 3009 /* determine column ownership if matrix is not square */ 3010 if (N != M) { 3011 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3012 else n = newMat->cmap->n; 3013 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3014 cstart = cend - n; 3015 } else { 3016 cstart = rstart; 3017 cend = rend; 3018 n = cend - cstart; 3019 } 3020 3021 /* loop over local rows, determining number of off diagonal entries */ 3022 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3023 jj = 0; 3024 for (i=0; i<m; i++) { 3025 for (j=0; j<ourlens[i]; j++) { 3026 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3027 jj++; 3028 } 3029 } 3030 3031 for (i=0; i<m; i++) { 3032 ourlens[i] -= offlens[i]; 3033 } 3034 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3035 3036 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3037 3038 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3039 3040 for (i=0; i<m; i++) { 3041 ourlens[i] += offlens[i]; 3042 } 3043 3044 if (!rank) { 3045 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3046 3047 /* read in my part of the matrix numerical values */ 3048 nz = procsnz[0]; 3049 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3050 3051 /* insert into matrix */ 3052 jj = rstart; 3053 smycols = mycols; 3054 svals = vals; 3055 for (i=0; i<m; i++) { 3056 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 smycols += ourlens[i]; 3058 svals += ourlens[i]; 3059 jj++; 3060 } 3061 3062 /* read in other processors and ship out */ 3063 for (i=1; i<size; i++) { 3064 nz = procsnz[i]; 3065 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3066 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3067 } 3068 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3069 } else { 3070 /* receive numeric values */ 3071 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3072 3073 /* receive message of values*/ 3074 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3075 3076 /* insert into matrix */ 3077 jj = rstart; 3078 smycols = mycols; 3079 svals = vals; 3080 for (i=0; i<m; i++) { 3081 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3082 smycols += ourlens[i]; 3083 svals += ourlens[i]; 3084 jj++; 3085 } 3086 } 3087 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3088 ierr = PetscFree(vals);CHKERRQ(ierr); 3089 ierr = PetscFree(mycols);CHKERRQ(ierr); 3090 ierr = PetscFree(rowners);CHKERRQ(ierr); 3091 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3093 PetscFunctionReturn(0); 3094 } 3095 3096 /* Not scalable because of ISAllGather() unless getting all columns. */ 3097 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3098 { 3099 PetscErrorCode ierr; 3100 IS iscol_local; 3101 PetscBool isstride; 3102 PetscMPIInt lisstride=0,gisstride; 3103 3104 PetscFunctionBegin; 3105 /* check if we are grabbing all columns*/ 3106 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3107 3108 if (isstride) { 3109 PetscInt start,len,mstart,mlen; 3110 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3111 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3112 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3113 if (mstart == start && mlen-mstart == len) lisstride = 1; 3114 } 3115 3116 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3117 if (gisstride) { 3118 PetscInt N; 3119 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3120 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3121 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3122 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3123 } else { 3124 PetscInt cbs; 3125 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3126 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3127 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3128 } 3129 3130 *isseq = iscol_local; 3131 PetscFunctionReturn(0); 3132 } 3133 3134 /* 3135 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3136 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3137 3138 Input Parameters: 3139 mat - matrix 3140 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3141 i.e., mat->rstart <= isrow[i] < mat->rend 3142 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3143 i.e., mat->cstart <= iscol[i] < mat->cend 3144 Output Parameter: 3145 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3146 iscol_o - sequential column index set for retrieving mat->B 3147 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3148 */ 3149 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3150 { 3151 PetscErrorCode ierr; 3152 Vec x,cmap; 3153 const PetscInt *is_idx; 3154 PetscScalar *xarray,*cmaparray; 3155 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3156 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3157 Mat B=a->B; 3158 Vec lvec=a->lvec,lcmap; 3159 PetscInt i,cstart,cend,Bn=B->cmap->N; 3160 MPI_Comm comm; 3161 VecScatter Mvctx=a->Mvctx; 3162 3163 PetscFunctionBegin; 3164 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3165 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3166 3167 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3168 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3169 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3170 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3171 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3172 3173 /* Get start indices */ 3174 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3175 isstart -= ncols; 3176 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3177 3178 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3179 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3180 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3181 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3182 for (i=0; i<ncols; i++) { 3183 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3184 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3185 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3186 } 3187 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3188 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3189 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3190 3191 /* Get iscol_d */ 3192 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3193 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3194 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3195 3196 /* Get isrow_d */ 3197 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3198 rstart = mat->rmap->rstart; 3199 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3200 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3201 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3202 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3203 3204 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3205 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3206 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3207 3208 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3209 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3211 3212 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3213 3214 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3216 3217 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3218 /* off-process column indices */ 3219 count = 0; 3220 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3221 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3222 3223 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3224 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3225 for (i=0; i<Bn; i++) { 3226 if (PetscRealPart(xarray[i]) > -1.0) { 3227 idx[count] = i; /* local column index in off-diagonal part B */ 3228 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3229 count++; 3230 } 3231 } 3232 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3234 3235 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3236 /* cannot ensure iscol_o has same blocksize as iscol! */ 3237 3238 ierr = PetscFree(idx);CHKERRQ(ierr); 3239 *garray = cmap1; 3240 3241 ierr = VecDestroy(&x);CHKERRQ(ierr); 3242 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3243 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3244 PetscFunctionReturn(0); 3245 } 3246 3247 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3248 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3249 { 3250 PetscErrorCode ierr; 3251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3252 Mat M = NULL; 3253 MPI_Comm comm; 3254 IS iscol_d,isrow_d,iscol_o; 3255 Mat Asub = NULL,Bsub = NULL; 3256 PetscInt n; 3257 3258 PetscFunctionBegin; 3259 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3260 3261 if (call == MAT_REUSE_MATRIX) { 3262 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3263 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3264 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3265 3266 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3267 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3268 3269 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3270 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3271 3272 /* Update diagonal and off-diagonal portions of submat */ 3273 asub = (Mat_MPIAIJ*)(*submat)->data; 3274 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3275 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3276 if (n) { 3277 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3278 } 3279 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3280 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3281 3282 } else { /* call == MAT_INITIAL_MATRIX) */ 3283 const PetscInt *garray; 3284 PetscInt BsubN; 3285 3286 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3287 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3288 3289 /* Create local submatrices Asub and Bsub */ 3290 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3291 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3292 3293 /* Create submatrix M */ 3294 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3295 3296 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3297 asub = (Mat_MPIAIJ*)M->data; 3298 3299 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3300 n = asub->B->cmap->N; 3301 if (BsubN > n) { 3302 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3303 const PetscInt *idx; 3304 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3305 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3306 3307 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3308 j = 0; 3309 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3310 for (i=0; i<n; i++) { 3311 if (j >= BsubN) break; 3312 while (subgarray[i] > garray[j]) j++; 3313 3314 if (subgarray[i] == garray[j]) { 3315 idx_new[i] = idx[j++]; 3316 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3317 } 3318 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3319 3320 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3321 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3322 3323 } else if (BsubN < n) { 3324 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3325 } 3326 3327 ierr = PetscFree(garray);CHKERRQ(ierr); 3328 *submat = M; 3329 3330 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3331 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3332 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3333 3334 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3335 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3336 3337 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3338 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3339 } 3340 PetscFunctionReturn(0); 3341 } 3342 3343 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3344 { 3345 PetscErrorCode ierr; 3346 IS iscol_local=NULL,isrow_d; 3347 PetscInt csize; 3348 PetscInt n,i,j,start,end; 3349 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3350 MPI_Comm comm; 3351 3352 PetscFunctionBegin; 3353 /* If isrow has same processor distribution as mat, 3354 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3355 if (call == MAT_REUSE_MATRIX) { 3356 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3357 if (isrow_d) { 3358 sameRowDist = PETSC_TRUE; 3359 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3360 } else { 3361 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3362 if (iscol_local) { 3363 sameRowDist = PETSC_TRUE; 3364 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3365 } 3366 } 3367 } else { 3368 /* Check if isrow has same processor distribution as mat */ 3369 sameDist[0] = PETSC_FALSE; 3370 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3371 if (!n) { 3372 sameDist[0] = PETSC_TRUE; 3373 } else { 3374 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3375 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3376 if (i >= start && j < end) { 3377 sameDist[0] = PETSC_TRUE; 3378 } 3379 } 3380 3381 /* Check if iscol has same processor distribution as mat */ 3382 sameDist[1] = PETSC_FALSE; 3383 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3384 if (!n) { 3385 sameDist[1] = PETSC_TRUE; 3386 } else { 3387 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3388 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3389 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3390 } 3391 3392 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3393 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3394 sameRowDist = tsameDist[0]; 3395 } 3396 3397 if (sameRowDist) { 3398 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3399 /* isrow and iscol have same processor distribution as mat */ 3400 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } else { /* sameRowDist */ 3403 /* isrow has same processor distribution as mat */ 3404 if (call == MAT_INITIAL_MATRIX) { 3405 PetscBool sorted; 3406 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3407 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3408 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3409 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3410 3411 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3412 if (sorted) { 3413 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3414 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 } else { /* call == MAT_REUSE_MATRIX */ 3418 IS iscol_sub; 3419 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3420 if (iscol_sub) { 3421 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 } 3425 } 3426 } 3427 3428 /* General case: iscol -> iscol_local which has global size of iscol */ 3429 if (call == MAT_REUSE_MATRIX) { 3430 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3431 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3432 } else { 3433 if (!iscol_local) { 3434 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3435 } 3436 } 3437 3438 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3439 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3440 3441 if (call == MAT_INITIAL_MATRIX) { 3442 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3443 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3444 } 3445 PetscFunctionReturn(0); 3446 } 3447 3448 /*@C 3449 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3450 and "off-diagonal" part of the matrix in CSR format. 3451 3452 Collective on MPI_Comm 3453 3454 Input Parameters: 3455 + comm - MPI communicator 3456 . A - "diagonal" portion of matrix 3457 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3458 - garray - global index of B columns 3459 3460 Output Parameter: 3461 . mat - the matrix, with input A as its local diagonal matrix 3462 Level: advanced 3463 3464 Notes: 3465 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3466 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3467 3468 .seealso: MatCreateMPIAIJWithSplitArrays() 3469 @*/ 3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3471 { 3472 PetscErrorCode ierr; 3473 Mat_MPIAIJ *maij; 3474 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3475 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3476 PetscScalar *oa=b->a; 3477 Mat Bnew; 3478 PetscInt m,n,N; 3479 3480 PetscFunctionBegin; 3481 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3482 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3483 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3484 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3485 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3486 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3487 3488 /* Get global columns of mat */ 3489 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3490 3491 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3492 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3493 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3494 maij = (Mat_MPIAIJ*)(*mat)->data; 3495 3496 (*mat)->preallocated = PETSC_TRUE; 3497 3498 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3499 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3500 3501 /* Set A as diagonal portion of *mat */ 3502 maij->A = A; 3503 3504 nz = oi[m]; 3505 for (i=0; i<nz; i++) { 3506 col = oj[i]; 3507 oj[i] = garray[col]; 3508 } 3509 3510 /* Set Bnew as off-diagonal portion of *mat */ 3511 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3512 bnew = (Mat_SeqAIJ*)Bnew->data; 3513 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3514 maij->B = Bnew; 3515 3516 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3517 3518 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3519 b->free_a = PETSC_FALSE; 3520 b->free_ij = PETSC_FALSE; 3521 ierr = MatDestroy(&B);CHKERRQ(ierr); 3522 3523 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3524 bnew->free_a = PETSC_TRUE; 3525 bnew->free_ij = PETSC_TRUE; 3526 3527 /* condense columns of maij->B */ 3528 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3529 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3530 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3531 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3532 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3533 PetscFunctionReturn(0); 3534 } 3535 3536 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3537 3538 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3539 { 3540 PetscErrorCode ierr; 3541 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3542 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3543 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3544 Mat M,Msub,B=a->B; 3545 MatScalar *aa; 3546 Mat_SeqAIJ *aij; 3547 PetscInt *garray = a->garray,*colsub,Ncols; 3548 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3549 IS iscol_sub,iscmap; 3550 const PetscInt *is_idx,*cmap; 3551 PetscBool allcolumns=PETSC_FALSE; 3552 MPI_Comm comm; 3553 3554 PetscFunctionBegin; 3555 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3556 3557 if (call == MAT_REUSE_MATRIX) { 3558 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3559 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3560 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3561 3562 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3563 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3564 3565 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3566 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3567 3568 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3569 3570 } else { /* call == MAT_INITIAL_MATRIX) */ 3571 PetscBool flg; 3572 3573 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3574 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3575 3576 /* (1) iscol -> nonscalable iscol_local */ 3577 /* Check for special case: each processor gets entire matrix columns */ 3578 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3579 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3580 if (allcolumns) { 3581 iscol_sub = iscol_local; 3582 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3583 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3584 3585 } else { 3586 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3587 PetscInt *idx,*cmap1,k; 3588 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3589 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3590 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3591 count = 0; 3592 k = 0; 3593 for (i=0; i<Ncols; i++) { 3594 j = is_idx[i]; 3595 if (j >= cstart && j < cend) { 3596 /* diagonal part of mat */ 3597 idx[count] = j; 3598 cmap1[count++] = i; /* column index in submat */ 3599 } else if (Bn) { 3600 /* off-diagonal part of mat */ 3601 if (j == garray[k]) { 3602 idx[count] = j; 3603 cmap1[count++] = i; /* column index in submat */ 3604 } else if (j > garray[k]) { 3605 while (j > garray[k] && k < Bn-1) k++; 3606 if (j == garray[k]) { 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } 3610 } 3611 } 3612 } 3613 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3614 3615 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3616 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3617 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3618 3619 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3620 } 3621 3622 /* (3) Create sequential Msub */ 3623 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3624 } 3625 3626 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3627 aij = (Mat_SeqAIJ*)(Msub)->data; 3628 ii = aij->i; 3629 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3630 3631 /* 3632 m - number of local rows 3633 Ncols - number of columns (same on all processors) 3634 rstart - first row in new global matrix generated 3635 */ 3636 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3637 3638 if (call == MAT_INITIAL_MATRIX) { 3639 /* (4) Create parallel newmat */ 3640 PetscMPIInt rank,size; 3641 PetscInt csize; 3642 3643 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3645 3646 /* 3647 Determine the number of non-zeros in the diagonal and off-diagonal 3648 portions of the matrix in order to do correct preallocation 3649 */ 3650 3651 /* first get start and end of "diagonal" columns */ 3652 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3653 if (csize == PETSC_DECIDE) { 3654 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3655 if (mglobal == Ncols) { /* square matrix */ 3656 nlocal = m; 3657 } else { 3658 nlocal = Ncols/size + ((Ncols % size) > rank); 3659 } 3660 } else { 3661 nlocal = csize; 3662 } 3663 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3664 rstart = rend - nlocal; 3665 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3666 3667 /* next, compute all the lengths */ 3668 jj = aij->j; 3669 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3670 olens = dlens + m; 3671 for (i=0; i<m; i++) { 3672 jend = ii[i+1] - ii[i]; 3673 olen = 0; 3674 dlen = 0; 3675 for (j=0; j<jend; j++) { 3676 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3677 else dlen++; 3678 jj++; 3679 } 3680 olens[i] = olen; 3681 dlens[i] = dlen; 3682 } 3683 3684 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3685 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3686 3687 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3688 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3689 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3690 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3691 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3692 ierr = PetscFree(dlens);CHKERRQ(ierr); 3693 3694 } else { /* call == MAT_REUSE_MATRIX */ 3695 M = *newmat; 3696 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3697 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3698 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3699 /* 3700 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3701 rather than the slower MatSetValues(). 3702 */ 3703 M->was_assembled = PETSC_TRUE; 3704 M->assembled = PETSC_FALSE; 3705 } 3706 3707 /* (5) Set values of Msub to *newmat */ 3708 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3709 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3710 3711 jj = aij->j; 3712 aa = aij->a; 3713 for (i=0; i<m; i++) { 3714 row = rstart + i; 3715 nz = ii[i+1] - ii[i]; 3716 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3717 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3718 jj += nz; aa += nz; 3719 } 3720 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3721 3722 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724 3725 ierr = PetscFree(colsub);CHKERRQ(ierr); 3726 3727 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3728 if (call == MAT_INITIAL_MATRIX) { 3729 *newmat = M; 3730 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3731 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3732 3733 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3734 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3735 3736 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3737 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3738 3739 if (iscol_local) { 3740 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3741 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3742 } 3743 } 3744 PetscFunctionReturn(0); 3745 } 3746 3747 /* 3748 Not great since it makes two copies of the submatrix, first an SeqAIJ 3749 in local and then by concatenating the local matrices the end result. 3750 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3751 3752 Note: This requires a sequential iscol with all indices. 3753 */ 3754 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3755 { 3756 PetscErrorCode ierr; 3757 PetscMPIInt rank,size; 3758 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3759 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3760 Mat M,Mreuse; 3761 MatScalar *aa,*vwork; 3762 MPI_Comm comm; 3763 Mat_SeqAIJ *aij; 3764 PetscBool colflag,allcolumns=PETSC_FALSE; 3765 3766 PetscFunctionBegin; 3767 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3768 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3769 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3770 3771 /* Check for special case: each processor gets entire matrix columns */ 3772 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3773 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3774 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3775 3776 if (call == MAT_REUSE_MATRIX) { 3777 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3778 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3779 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3780 } else { 3781 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3782 } 3783 3784 /* 3785 m - number of local rows 3786 n - number of columns (same on all processors) 3787 rstart - first row in new global matrix generated 3788 */ 3789 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3790 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3791 if (call == MAT_INITIAL_MATRIX) { 3792 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3793 ii = aij->i; 3794 jj = aij->j; 3795 3796 /* 3797 Determine the number of non-zeros in the diagonal and off-diagonal 3798 portions of the matrix in order to do correct preallocation 3799 */ 3800 3801 /* first get start and end of "diagonal" columns */ 3802 if (csize == PETSC_DECIDE) { 3803 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3804 if (mglobal == n) { /* square matrix */ 3805 nlocal = m; 3806 } else { 3807 nlocal = n/size + ((n % size) > rank); 3808 } 3809 } else { 3810 nlocal = csize; 3811 } 3812 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3813 rstart = rend - nlocal; 3814 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3815 3816 /* next, compute all the lengths */ 3817 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3818 olens = dlens + m; 3819 for (i=0; i<m; i++) { 3820 jend = ii[i+1] - ii[i]; 3821 olen = 0; 3822 dlen = 0; 3823 for (j=0; j<jend; j++) { 3824 if (*jj < rstart || *jj >= rend) olen++; 3825 else dlen++; 3826 jj++; 3827 } 3828 olens[i] = olen; 3829 dlens[i] = dlen; 3830 } 3831 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3832 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3833 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3834 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3835 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3836 ierr = PetscFree(dlens);CHKERRQ(ierr); 3837 } else { 3838 PetscInt ml,nl; 3839 3840 M = *newmat; 3841 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3842 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3843 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3844 /* 3845 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3846 rather than the slower MatSetValues(). 3847 */ 3848 M->was_assembled = PETSC_TRUE; 3849 M->assembled = PETSC_FALSE; 3850 } 3851 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3852 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3853 ii = aij->i; 3854 jj = aij->j; 3855 aa = aij->a; 3856 for (i=0; i<m; i++) { 3857 row = rstart + i; 3858 nz = ii[i+1] - ii[i]; 3859 cwork = jj; jj += nz; 3860 vwork = aa; aa += nz; 3861 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3862 } 3863 3864 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 *newmat = M; 3867 3868 /* save submatrix used in processor for next request */ 3869 if (call == MAT_INITIAL_MATRIX) { 3870 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3871 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3872 } 3873 PetscFunctionReturn(0); 3874 } 3875 3876 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3877 { 3878 PetscInt m,cstart, cend,j,nnz,i,d; 3879 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3880 const PetscInt *JJ; 3881 PetscScalar *values; 3882 PetscErrorCode ierr; 3883 PetscBool nooffprocentries; 3884 3885 PetscFunctionBegin; 3886 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3887 3888 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3889 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3890 m = B->rmap->n; 3891 cstart = B->cmap->rstart; 3892 cend = B->cmap->rend; 3893 rstart = B->rmap->rstart; 3894 3895 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3896 3897 #if defined(PETSC_USE_DEBUG) 3898 for (i=0; i<m; i++) { 3899 nnz = Ii[i+1]- Ii[i]; 3900 JJ = J + Ii[i]; 3901 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3902 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3903 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3904 } 3905 #endif 3906 3907 for (i=0; i<m; i++) { 3908 nnz = Ii[i+1]- Ii[i]; 3909 JJ = J + Ii[i]; 3910 nnz_max = PetscMax(nnz_max,nnz); 3911 d = 0; 3912 for (j=0; j<nnz; j++) { 3913 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3914 } 3915 d_nnz[i] = d; 3916 o_nnz[i] = nnz - d; 3917 } 3918 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3919 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3920 3921 if (v) values = (PetscScalar*)v; 3922 else { 3923 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3924 } 3925 3926 for (i=0; i<m; i++) { 3927 ii = i + rstart; 3928 nnz = Ii[i+1]- Ii[i]; 3929 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3930 } 3931 nooffprocentries = B->nooffprocentries; 3932 B->nooffprocentries = PETSC_TRUE; 3933 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3934 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3935 B->nooffprocentries = nooffprocentries; 3936 3937 if (!v) { 3938 ierr = PetscFree(values);CHKERRQ(ierr); 3939 } 3940 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3941 PetscFunctionReturn(0); 3942 } 3943 3944 /*@ 3945 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3946 (the default parallel PETSc format). 3947 3948 Collective on MPI_Comm 3949 3950 Input Parameters: 3951 + B - the matrix 3952 . i - the indices into j for the start of each local row (starts with zero) 3953 . j - the column indices for each local row (starts with zero) 3954 - v - optional values in the matrix 3955 3956 Level: developer 3957 3958 Notes: 3959 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3960 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3961 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3962 3963 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3964 3965 The format which is used for the sparse matrix input, is equivalent to a 3966 row-major ordering.. i.e for the following matrix, the input data expected is 3967 as shown 3968 3969 $ 1 0 0 3970 $ 2 0 3 P0 3971 $ ------- 3972 $ 4 5 6 P1 3973 $ 3974 $ Process0 [P0]: rows_owned=[0,1] 3975 $ i = {0,1,3} [size = nrow+1 = 2+1] 3976 $ j = {0,0,2} [size = 3] 3977 $ v = {1,2,3} [size = 3] 3978 $ 3979 $ Process1 [P1]: rows_owned=[2] 3980 $ i = {0,3} [size = nrow+1 = 1+1] 3981 $ j = {0,1,2} [size = 3] 3982 $ v = {4,5,6} [size = 3] 3983 3984 .keywords: matrix, aij, compressed row, sparse, parallel 3985 3986 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3987 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3988 @*/ 3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3990 { 3991 PetscErrorCode ierr; 3992 3993 PetscFunctionBegin; 3994 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3995 PetscFunctionReturn(0); 3996 } 3997 3998 /*@C 3999 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4000 (the default parallel PETSc format). For good matrix assembly performance 4001 the user should preallocate the matrix storage by setting the parameters 4002 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4003 performance can be increased by more than a factor of 50. 4004 4005 Collective on MPI_Comm 4006 4007 Input Parameters: 4008 + B - the matrix 4009 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4010 (same value is used for all local rows) 4011 . d_nnz - array containing the number of nonzeros in the various rows of the 4012 DIAGONAL portion of the local submatrix (possibly different for each row) 4013 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4014 The size of this array is equal to the number of local rows, i.e 'm'. 4015 For matrices that will be factored, you must leave room for (and set) 4016 the diagonal entry even if it is zero. 4017 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4018 submatrix (same value is used for all local rows). 4019 - o_nnz - array containing the number of nonzeros in the various rows of the 4020 OFF-DIAGONAL portion of the local submatrix (possibly different for 4021 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4022 structure. The size of this array is equal to the number 4023 of local rows, i.e 'm'. 4024 4025 If the *_nnz parameter is given then the *_nz parameter is ignored 4026 4027 The AIJ format (also called the Yale sparse matrix format or 4028 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4029 storage. The stored row and column indices begin with zero. 4030 See Users-Manual: ch_mat for details. 4031 4032 The parallel matrix is partitioned such that the first m0 rows belong to 4033 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4034 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4035 4036 The DIAGONAL portion of the local submatrix of a processor can be defined 4037 as the submatrix which is obtained by extraction the part corresponding to 4038 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4039 first row that belongs to the processor, r2 is the last row belonging to 4040 the this processor, and c1-c2 is range of indices of the local part of a 4041 vector suitable for applying the matrix to. This is an mxn matrix. In the 4042 common case of a square matrix, the row and column ranges are the same and 4043 the DIAGONAL part is also square. The remaining portion of the local 4044 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4045 4046 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4047 4048 You can call MatGetInfo() to get information on how effective the preallocation was; 4049 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4050 You can also run with the option -info and look for messages with the string 4051 malloc in them to see if additional memory allocation was needed. 4052 4053 Example usage: 4054 4055 Consider the following 8x8 matrix with 34 non-zero values, that is 4056 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4057 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4058 as follows: 4059 4060 .vb 4061 1 2 0 | 0 3 0 | 0 4 4062 Proc0 0 5 6 | 7 0 0 | 8 0 4063 9 0 10 | 11 0 0 | 12 0 4064 ------------------------------------- 4065 13 0 14 | 15 16 17 | 0 0 4066 Proc1 0 18 0 | 19 20 21 | 0 0 4067 0 0 0 | 22 23 0 | 24 0 4068 ------------------------------------- 4069 Proc2 25 26 27 | 0 0 28 | 29 0 4070 30 0 0 | 31 32 33 | 0 34 4071 .ve 4072 4073 This can be represented as a collection of submatrices as: 4074 4075 .vb 4076 A B C 4077 D E F 4078 G H I 4079 .ve 4080 4081 Where the submatrices A,B,C are owned by proc0, D,E,F are 4082 owned by proc1, G,H,I are owned by proc2. 4083 4084 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4085 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4086 The 'M','N' parameters are 8,8, and have the same values on all procs. 4087 4088 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4089 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4090 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4091 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4092 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4093 matrix, ans [DF] as another SeqAIJ matrix. 4094 4095 When d_nz, o_nz parameters are specified, d_nz storage elements are 4096 allocated for every row of the local diagonal submatrix, and o_nz 4097 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4098 One way to choose d_nz and o_nz is to use the max nonzerors per local 4099 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4100 In this case, the values of d_nz,o_nz are: 4101 .vb 4102 proc0 : dnz = 2, o_nz = 2 4103 proc1 : dnz = 3, o_nz = 2 4104 proc2 : dnz = 1, o_nz = 4 4105 .ve 4106 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4107 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4108 for proc3. i.e we are using 12+15+10=37 storage locations to store 4109 34 values. 4110 4111 When d_nnz, o_nnz parameters are specified, the storage is specified 4112 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4113 In the above case the values for d_nnz,o_nnz are: 4114 .vb 4115 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4116 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4117 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4118 .ve 4119 Here the space allocated is sum of all the above values i.e 34, and 4120 hence pre-allocation is perfect. 4121 4122 Level: intermediate 4123 4124 .keywords: matrix, aij, compressed row, sparse, parallel 4125 4126 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4127 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4128 @*/ 4129 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4130 { 4131 PetscErrorCode ierr; 4132 4133 PetscFunctionBegin; 4134 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4135 PetscValidType(B,1); 4136 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4137 PetscFunctionReturn(0); 4138 } 4139 4140 /*@ 4141 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4142 CSR format the local rows. 4143 4144 Collective on MPI_Comm 4145 4146 Input Parameters: 4147 + comm - MPI communicator 4148 . m - number of local rows (Cannot be PETSC_DECIDE) 4149 . n - This value should be the same as the local size used in creating the 4150 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4151 calculated if N is given) For square matrices n is almost always m. 4152 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4153 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4154 . i - row indices 4155 . j - column indices 4156 - a - matrix values 4157 4158 Output Parameter: 4159 . mat - the matrix 4160 4161 Level: intermediate 4162 4163 Notes: 4164 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4165 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4166 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4167 4168 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4169 4170 The format which is used for the sparse matrix input, is equivalent to a 4171 row-major ordering.. i.e for the following matrix, the input data expected is 4172 as shown 4173 4174 $ 1 0 0 4175 $ 2 0 3 P0 4176 $ ------- 4177 $ 4 5 6 P1 4178 $ 4179 $ Process0 [P0]: rows_owned=[0,1] 4180 $ i = {0,1,3} [size = nrow+1 = 2+1] 4181 $ j = {0,0,2} [size = 3] 4182 $ v = {1,2,3} [size = 3] 4183 $ 4184 $ Process1 [P1]: rows_owned=[2] 4185 $ i = {0,3} [size = nrow+1 = 1+1] 4186 $ j = {0,1,2} [size = 3] 4187 $ v = {4,5,6} [size = 3] 4188 4189 .keywords: matrix, aij, compressed row, sparse, parallel 4190 4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4192 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4193 @*/ 4194 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4195 { 4196 PetscErrorCode ierr; 4197 4198 PetscFunctionBegin; 4199 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4200 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4201 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4202 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4203 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4204 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4205 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4206 PetscFunctionReturn(0); 4207 } 4208 4209 /*@C 4210 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4211 (the default parallel PETSc format). For good matrix assembly performance 4212 the user should preallocate the matrix storage by setting the parameters 4213 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4214 performance can be increased by more than a factor of 50. 4215 4216 Collective on MPI_Comm 4217 4218 Input Parameters: 4219 + comm - MPI communicator 4220 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4221 This value should be the same as the local size used in creating the 4222 y vector for the matrix-vector product y = Ax. 4223 . n - This value should be the same as the local size used in creating the 4224 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4225 calculated if N is given) For square matrices n is almost always m. 4226 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4227 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4228 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4229 (same value is used for all local rows) 4230 . d_nnz - array containing the number of nonzeros in the various rows of the 4231 DIAGONAL portion of the local submatrix (possibly different for each row) 4232 or NULL, if d_nz is used to specify the nonzero structure. 4233 The size of this array is equal to the number of local rows, i.e 'm'. 4234 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4235 submatrix (same value is used for all local rows). 4236 - o_nnz - array containing the number of nonzeros in the various rows of the 4237 OFF-DIAGONAL portion of the local submatrix (possibly different for 4238 each row) or NULL, if o_nz is used to specify the nonzero 4239 structure. The size of this array is equal to the number 4240 of local rows, i.e 'm'. 4241 4242 Output Parameter: 4243 . A - the matrix 4244 4245 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4246 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4247 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4248 4249 Notes: 4250 If the *_nnz parameter is given then the *_nz parameter is ignored 4251 4252 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4253 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4254 storage requirements for this matrix. 4255 4256 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4257 processor than it must be used on all processors that share the object for 4258 that argument. 4259 4260 The user MUST specify either the local or global matrix dimensions 4261 (possibly both). 4262 4263 The parallel matrix is partitioned across processors such that the 4264 first m0 rows belong to process 0, the next m1 rows belong to 4265 process 1, the next m2 rows belong to process 2 etc.. where 4266 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4267 values corresponding to [m x N] submatrix. 4268 4269 The columns are logically partitioned with the n0 columns belonging 4270 to 0th partition, the next n1 columns belonging to the next 4271 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4272 4273 The DIAGONAL portion of the local submatrix on any given processor 4274 is the submatrix corresponding to the rows and columns m,n 4275 corresponding to the given processor. i.e diagonal matrix on 4276 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4277 etc. The remaining portion of the local submatrix [m x (N-n)] 4278 constitute the OFF-DIAGONAL portion. The example below better 4279 illustrates this concept. 4280 4281 For a square global matrix we define each processor's diagonal portion 4282 to be its local rows and the corresponding columns (a square submatrix); 4283 each processor's off-diagonal portion encompasses the remainder of the 4284 local matrix (a rectangular submatrix). 4285 4286 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4287 4288 When calling this routine with a single process communicator, a matrix of 4289 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4290 type of communicator, use the construction mechanism 4291 .vb 4292 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4293 .ve 4294 4295 $ MatCreate(...,&A); 4296 $ MatSetType(A,MATMPIAIJ); 4297 $ MatSetSizes(A, m,n,M,N); 4298 $ MatMPIAIJSetPreallocation(A,...); 4299 4300 By default, this format uses inodes (identical nodes) when possible. 4301 We search for consecutive rows with the same nonzero structure, thereby 4302 reusing matrix information to achieve increased efficiency. 4303 4304 Options Database Keys: 4305 + -mat_no_inode - Do not use inodes 4306 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4307 4308 4309 4310 Example usage: 4311 4312 Consider the following 8x8 matrix with 34 non-zero values, that is 4313 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4314 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4315 as follows 4316 4317 .vb 4318 1 2 0 | 0 3 0 | 0 4 4319 Proc0 0 5 6 | 7 0 0 | 8 0 4320 9 0 10 | 11 0 0 | 12 0 4321 ------------------------------------- 4322 13 0 14 | 15 16 17 | 0 0 4323 Proc1 0 18 0 | 19 20 21 | 0 0 4324 0 0 0 | 22 23 0 | 24 0 4325 ------------------------------------- 4326 Proc2 25 26 27 | 0 0 28 | 29 0 4327 30 0 0 | 31 32 33 | 0 34 4328 .ve 4329 4330 This can be represented as a collection of submatrices as 4331 4332 .vb 4333 A B C 4334 D E F 4335 G H I 4336 .ve 4337 4338 Where the submatrices A,B,C are owned by proc0, D,E,F are 4339 owned by proc1, G,H,I are owned by proc2. 4340 4341 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4342 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4343 The 'M','N' parameters are 8,8, and have the same values on all procs. 4344 4345 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4346 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4347 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4348 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4349 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4350 matrix, ans [DF] as another SeqAIJ matrix. 4351 4352 When d_nz, o_nz parameters are specified, d_nz storage elements are 4353 allocated for every row of the local diagonal submatrix, and o_nz 4354 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4355 One way to choose d_nz and o_nz is to use the max nonzerors per local 4356 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4357 In this case, the values of d_nz,o_nz are 4358 .vb 4359 proc0 : dnz = 2, o_nz = 2 4360 proc1 : dnz = 3, o_nz = 2 4361 proc2 : dnz = 1, o_nz = 4 4362 .ve 4363 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4364 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4365 for proc3. i.e we are using 12+15+10=37 storage locations to store 4366 34 values. 4367 4368 When d_nnz, o_nnz parameters are specified, the storage is specified 4369 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4370 In the above case the values for d_nnz,o_nnz are 4371 .vb 4372 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4373 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4374 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4375 .ve 4376 Here the space allocated is sum of all the above values i.e 34, and 4377 hence pre-allocation is perfect. 4378 4379 Level: intermediate 4380 4381 .keywords: matrix, aij, compressed row, sparse, parallel 4382 4383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4384 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4385 @*/ 4386 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4387 { 4388 PetscErrorCode ierr; 4389 PetscMPIInt size; 4390 4391 PetscFunctionBegin; 4392 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4393 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4394 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4395 if (size > 1) { 4396 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4397 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4398 } else { 4399 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4400 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4401 } 4402 PetscFunctionReturn(0); 4403 } 4404 4405 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4406 { 4407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4408 PetscBool flg; 4409 PetscErrorCode ierr; 4410 4411 PetscFunctionBegin; 4412 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4413 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4414 if (Ad) *Ad = a->A; 4415 if (Ao) *Ao = a->B; 4416 if (colmap) *colmap = a->garray; 4417 PetscFunctionReturn(0); 4418 } 4419 4420 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4421 { 4422 PetscErrorCode ierr; 4423 PetscInt m,N,i,rstart,nnz,Ii; 4424 PetscInt *indx; 4425 PetscScalar *values; 4426 4427 PetscFunctionBegin; 4428 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4429 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4430 PetscInt *dnz,*onz,sum,bs,cbs; 4431 4432 if (n == PETSC_DECIDE) { 4433 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4434 } 4435 /* Check sum(n) = N */ 4436 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4437 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4438 4439 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4440 rstart -= m; 4441 4442 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4443 for (i=0; i<m; i++) { 4444 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4445 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4446 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4447 } 4448 4449 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4450 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4451 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4452 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4453 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4454 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4455 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4456 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4457 } 4458 4459 /* numeric phase */ 4460 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4461 for (i=0; i<m; i++) { 4462 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4463 Ii = i + rstart; 4464 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4465 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4466 } 4467 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4473 { 4474 PetscErrorCode ierr; 4475 PetscMPIInt rank; 4476 PetscInt m,N,i,rstart,nnz; 4477 size_t len; 4478 const PetscInt *indx; 4479 PetscViewer out; 4480 char *name; 4481 Mat B; 4482 const PetscScalar *values; 4483 4484 PetscFunctionBegin; 4485 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4486 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4487 /* Should this be the type of the diagonal block of A? */ 4488 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4489 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4490 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4491 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4492 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4493 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4494 for (i=0; i<m; i++) { 4495 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4496 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4497 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4498 } 4499 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4501 4502 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4503 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4504 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4505 sprintf(name,"%s.%d",outfile,rank); 4506 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4507 ierr = PetscFree(name);CHKERRQ(ierr); 4508 ierr = MatView(B,out);CHKERRQ(ierr); 4509 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4510 ierr = MatDestroy(&B);CHKERRQ(ierr); 4511 PetscFunctionReturn(0); 4512 } 4513 4514 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4515 { 4516 PetscErrorCode ierr; 4517 Mat_Merge_SeqsToMPI *merge; 4518 PetscContainer container; 4519 4520 PetscFunctionBegin; 4521 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4522 if (container) { 4523 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4524 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4525 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4526 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4527 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4528 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4529 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4530 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4531 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4532 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4533 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4534 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4536 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4537 ierr = PetscFree(merge);CHKERRQ(ierr); 4538 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4539 } 4540 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 #include <../src/mat/utils/freespace.h> 4545 #include <petscbt.h> 4546 4547 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4548 { 4549 PetscErrorCode ierr; 4550 MPI_Comm comm; 4551 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4552 PetscMPIInt size,rank,taga,*len_s; 4553 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4554 PetscInt proc,m; 4555 PetscInt **buf_ri,**buf_rj; 4556 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4557 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4558 MPI_Request *s_waits,*r_waits; 4559 MPI_Status *status; 4560 MatScalar *aa=a->a; 4561 MatScalar **abuf_r,*ba_i; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4567 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4568 4569 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4570 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4571 4572 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4573 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4574 4575 bi = merge->bi; 4576 bj = merge->bj; 4577 buf_ri = merge->buf_ri; 4578 buf_rj = merge->buf_rj; 4579 4580 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4581 owners = merge->rowmap->range; 4582 len_s = merge->len_s; 4583 4584 /* send and recv matrix values */ 4585 /*-----------------------------*/ 4586 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4587 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4588 4589 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4590 for (proc=0,k=0; proc<size; proc++) { 4591 if (!len_s[proc]) continue; 4592 i = owners[proc]; 4593 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4594 k++; 4595 } 4596 4597 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4598 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4599 ierr = PetscFree(status);CHKERRQ(ierr); 4600 4601 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4602 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4603 4604 /* insert mat values of mpimat */ 4605 /*----------------------------*/ 4606 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4607 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4608 4609 for (k=0; k<merge->nrecv; k++) { 4610 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4611 nrows = *(buf_ri_k[k]); 4612 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4613 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4614 } 4615 4616 /* set values of ba */ 4617 m = merge->rowmap->n; 4618 for (i=0; i<m; i++) { 4619 arow = owners[rank] + i; 4620 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4621 bnzi = bi[i+1] - bi[i]; 4622 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4623 4624 /* add local non-zero vals of this proc's seqmat into ba */ 4625 anzi = ai[arow+1] - ai[arow]; 4626 aj = a->j + ai[arow]; 4627 aa = a->a + ai[arow]; 4628 nextaj = 0; 4629 for (j=0; nextaj<anzi; j++) { 4630 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4631 ba_i[j] += aa[nextaj++]; 4632 } 4633 } 4634 4635 /* add received vals into ba */ 4636 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4637 /* i-th row */ 4638 if (i == *nextrow[k]) { 4639 anzi = *(nextai[k]+1) - *nextai[k]; 4640 aj = buf_rj[k] + *(nextai[k]); 4641 aa = abuf_r[k] + *(nextai[k]); 4642 nextaj = 0; 4643 for (j=0; nextaj<anzi; j++) { 4644 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4645 ba_i[j] += aa[nextaj++]; 4646 } 4647 } 4648 nextrow[k]++; nextai[k]++; 4649 } 4650 } 4651 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4652 } 4653 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 4656 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4657 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4658 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4659 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4660 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4665 { 4666 PetscErrorCode ierr; 4667 Mat B_mpi; 4668 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4669 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4670 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4671 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4672 PetscInt len,proc,*dnz,*onz,bs,cbs; 4673 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4674 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4675 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4676 MPI_Status *status; 4677 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4678 PetscBT lnkbt; 4679 Mat_Merge_SeqsToMPI *merge; 4680 PetscContainer container; 4681 4682 PetscFunctionBegin; 4683 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4684 4685 /* make sure it is a PETSc comm */ 4686 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4687 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4688 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4689 4690 ierr = PetscNew(&merge);CHKERRQ(ierr); 4691 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4692 4693 /* determine row ownership */ 4694 /*---------------------------------------------------------*/ 4695 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4696 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4697 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4698 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4699 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4700 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4701 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4702 4703 m = merge->rowmap->n; 4704 owners = merge->rowmap->range; 4705 4706 /* determine the number of messages to send, their lengths */ 4707 /*---------------------------------------------------------*/ 4708 len_s = merge->len_s; 4709 4710 len = 0; /* length of buf_si[] */ 4711 merge->nsend = 0; 4712 for (proc=0; proc<size; proc++) { 4713 len_si[proc] = 0; 4714 if (proc == rank) { 4715 len_s[proc] = 0; 4716 } else { 4717 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4718 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4719 } 4720 if (len_s[proc]) { 4721 merge->nsend++; 4722 nrows = 0; 4723 for (i=owners[proc]; i<owners[proc+1]; i++) { 4724 if (ai[i+1] > ai[i]) nrows++; 4725 } 4726 len_si[proc] = 2*(nrows+1); 4727 len += len_si[proc]; 4728 } 4729 } 4730 4731 /* determine the number and length of messages to receive for ij-structure */ 4732 /*-------------------------------------------------------------------------*/ 4733 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4734 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4735 4736 /* post the Irecv of j-structure */ 4737 /*-------------------------------*/ 4738 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4739 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4740 4741 /* post the Isend of j-structure */ 4742 /*--------------------------------*/ 4743 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4744 4745 for (proc=0, k=0; proc<size; proc++) { 4746 if (!len_s[proc]) continue; 4747 i = owners[proc]; 4748 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4749 k++; 4750 } 4751 4752 /* receives and sends of j-structure are complete */ 4753 /*------------------------------------------------*/ 4754 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4755 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4756 4757 /* send and recv i-structure */ 4758 /*---------------------------*/ 4759 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4760 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4761 4762 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4763 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4764 for (proc=0,k=0; proc<size; proc++) { 4765 if (!len_s[proc]) continue; 4766 /* form outgoing message for i-structure: 4767 buf_si[0]: nrows to be sent 4768 [1:nrows]: row index (global) 4769 [nrows+1:2*nrows+1]: i-structure index 4770 */ 4771 /*-------------------------------------------*/ 4772 nrows = len_si[proc]/2 - 1; 4773 buf_si_i = buf_si + nrows+1; 4774 buf_si[0] = nrows; 4775 buf_si_i[0] = 0; 4776 nrows = 0; 4777 for (i=owners[proc]; i<owners[proc+1]; i++) { 4778 anzi = ai[i+1] - ai[i]; 4779 if (anzi) { 4780 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4781 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4782 nrows++; 4783 } 4784 } 4785 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4786 k++; 4787 buf_si += len_si[proc]; 4788 } 4789 4790 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4791 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4792 4793 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4794 for (i=0; i<merge->nrecv; i++) { 4795 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4796 } 4797 4798 ierr = PetscFree(len_si);CHKERRQ(ierr); 4799 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4800 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4801 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4802 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4803 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4804 ierr = PetscFree(status);CHKERRQ(ierr); 4805 4806 /* compute a local seq matrix in each processor */ 4807 /*----------------------------------------------*/ 4808 /* allocate bi array and free space for accumulating nonzero column info */ 4809 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4810 bi[0] = 0; 4811 4812 /* create and initialize a linked list */ 4813 nlnk = N+1; 4814 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4815 4816 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4817 len = ai[owners[rank+1]] - ai[owners[rank]]; 4818 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4819 4820 current_space = free_space; 4821 4822 /* determine symbolic info for each local row */ 4823 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4824 4825 for (k=0; k<merge->nrecv; k++) { 4826 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4827 nrows = *buf_ri_k[k]; 4828 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4829 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4830 } 4831 4832 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4833 len = 0; 4834 for (i=0; i<m; i++) { 4835 bnzi = 0; 4836 /* add local non-zero cols of this proc's seqmat into lnk */ 4837 arow = owners[rank] + i; 4838 anzi = ai[arow+1] - ai[arow]; 4839 aj = a->j + ai[arow]; 4840 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4841 bnzi += nlnk; 4842 /* add received col data into lnk */ 4843 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4844 if (i == *nextrow[k]) { /* i-th row */ 4845 anzi = *(nextai[k]+1) - *nextai[k]; 4846 aj = buf_rj[k] + *nextai[k]; 4847 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4848 bnzi += nlnk; 4849 nextrow[k]++; nextai[k]++; 4850 } 4851 } 4852 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4853 4854 /* if free space is not available, make more free space */ 4855 if (current_space->local_remaining<bnzi) { 4856 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4857 nspacedouble++; 4858 } 4859 /* copy data into free space, then initialize lnk */ 4860 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4861 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4862 4863 current_space->array += bnzi; 4864 current_space->local_used += bnzi; 4865 current_space->local_remaining -= bnzi; 4866 4867 bi[i+1] = bi[i] + bnzi; 4868 } 4869 4870 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4871 4872 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4873 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4874 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4875 4876 /* create symbolic parallel matrix B_mpi */ 4877 /*---------------------------------------*/ 4878 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4879 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4880 if (n==PETSC_DECIDE) { 4881 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4882 } else { 4883 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4884 } 4885 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4886 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4887 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4888 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4889 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4890 4891 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4892 B_mpi->assembled = PETSC_FALSE; 4893 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4894 merge->bi = bi; 4895 merge->bj = bj; 4896 merge->buf_ri = buf_ri; 4897 merge->buf_rj = buf_rj; 4898 merge->coi = NULL; 4899 merge->coj = NULL; 4900 merge->owners_co = NULL; 4901 4902 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4903 4904 /* attach the supporting struct to B_mpi for reuse */ 4905 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4906 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4907 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4908 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4909 *mpimat = B_mpi; 4910 4911 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4912 PetscFunctionReturn(0); 4913 } 4914 4915 /*@C 4916 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4917 matrices from each processor 4918 4919 Collective on MPI_Comm 4920 4921 Input Parameters: 4922 + comm - the communicators the parallel matrix will live on 4923 . seqmat - the input sequential matrices 4924 . m - number of local rows (or PETSC_DECIDE) 4925 . n - number of local columns (or PETSC_DECIDE) 4926 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4927 4928 Output Parameter: 4929 . mpimat - the parallel matrix generated 4930 4931 Level: advanced 4932 4933 Notes: 4934 The dimensions of the sequential matrix in each processor MUST be the same. 4935 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4936 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4937 @*/ 4938 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4939 { 4940 PetscErrorCode ierr; 4941 PetscMPIInt size; 4942 4943 PetscFunctionBegin; 4944 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4945 if (size == 1) { 4946 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4947 if (scall == MAT_INITIAL_MATRIX) { 4948 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4949 } else { 4950 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4951 } 4952 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4953 PetscFunctionReturn(0); 4954 } 4955 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4956 if (scall == MAT_INITIAL_MATRIX) { 4957 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4958 } 4959 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4960 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 4964 /*@ 4965 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4966 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4967 with MatGetSize() 4968 4969 Not Collective 4970 4971 Input Parameters: 4972 + A - the matrix 4973 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4974 4975 Output Parameter: 4976 . A_loc - the local sequential matrix generated 4977 4978 Level: developer 4979 4980 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4981 4982 @*/ 4983 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4984 { 4985 PetscErrorCode ierr; 4986 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4987 Mat_SeqAIJ *mat,*a,*b; 4988 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4989 MatScalar *aa,*ba,*cam; 4990 PetscScalar *ca; 4991 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4992 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4993 PetscBool match; 4994 MPI_Comm comm; 4995 PetscMPIInt size; 4996 4997 PetscFunctionBegin; 4998 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4999 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5000 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5001 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5002 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5003 5004 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5005 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5006 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5007 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5008 aa = a->a; ba = b->a; 5009 if (scall == MAT_INITIAL_MATRIX) { 5010 if (size == 1) { 5011 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5012 PetscFunctionReturn(0); 5013 } 5014 5015 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5016 ci[0] = 0; 5017 for (i=0; i<am; i++) { 5018 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5019 } 5020 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5021 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5022 k = 0; 5023 for (i=0; i<am; i++) { 5024 ncols_o = bi[i+1] - bi[i]; 5025 ncols_d = ai[i+1] - ai[i]; 5026 /* off-diagonal portion of A */ 5027 for (jo=0; jo<ncols_o; jo++) { 5028 col = cmap[*bj]; 5029 if (col >= cstart) break; 5030 cj[k] = col; bj++; 5031 ca[k++] = *ba++; 5032 } 5033 /* diagonal portion of A */ 5034 for (j=0; j<ncols_d; j++) { 5035 cj[k] = cstart + *aj++; 5036 ca[k++] = *aa++; 5037 } 5038 /* off-diagonal portion of A */ 5039 for (j=jo; j<ncols_o; j++) { 5040 cj[k] = cmap[*bj++]; 5041 ca[k++] = *ba++; 5042 } 5043 } 5044 /* put together the new matrix */ 5045 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5046 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5047 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5048 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5049 mat->free_a = PETSC_TRUE; 5050 mat->free_ij = PETSC_TRUE; 5051 mat->nonew = 0; 5052 } else if (scall == MAT_REUSE_MATRIX) { 5053 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5054 ci = mat->i; cj = mat->j; cam = mat->a; 5055 for (i=0; i<am; i++) { 5056 /* off-diagonal portion of A */ 5057 ncols_o = bi[i+1] - bi[i]; 5058 for (jo=0; jo<ncols_o; jo++) { 5059 col = cmap[*bj]; 5060 if (col >= cstart) break; 5061 *cam++ = *ba++; bj++; 5062 } 5063 /* diagonal portion of A */ 5064 ncols_d = ai[i+1] - ai[i]; 5065 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5066 /* off-diagonal portion of A */ 5067 for (j=jo; j<ncols_o; j++) { 5068 *cam++ = *ba++; bj++; 5069 } 5070 } 5071 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5072 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5073 PetscFunctionReturn(0); 5074 } 5075 5076 /*@C 5077 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5078 5079 Not Collective 5080 5081 Input Parameters: 5082 + A - the matrix 5083 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5084 - row, col - index sets of rows and columns to extract (or NULL) 5085 5086 Output Parameter: 5087 . A_loc - the local sequential matrix generated 5088 5089 Level: developer 5090 5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5092 5093 @*/ 5094 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5095 { 5096 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5097 PetscErrorCode ierr; 5098 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5099 IS isrowa,iscola; 5100 Mat *aloc; 5101 PetscBool match; 5102 5103 PetscFunctionBegin; 5104 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5105 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5106 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5107 if (!row) { 5108 start = A->rmap->rstart; end = A->rmap->rend; 5109 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5110 } else { 5111 isrowa = *row; 5112 } 5113 if (!col) { 5114 start = A->cmap->rstart; 5115 cmap = a->garray; 5116 nzA = a->A->cmap->n; 5117 nzB = a->B->cmap->n; 5118 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5119 ncols = 0; 5120 for (i=0; i<nzB; i++) { 5121 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5122 else break; 5123 } 5124 imark = i; 5125 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5126 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5127 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5128 } else { 5129 iscola = *col; 5130 } 5131 if (scall != MAT_INITIAL_MATRIX) { 5132 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5133 aloc[0] = *A_loc; 5134 } 5135 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5136 if (!col) { /* attach global id of condensed columns */ 5137 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5138 } 5139 *A_loc = aloc[0]; 5140 ierr = PetscFree(aloc);CHKERRQ(ierr); 5141 if (!row) { 5142 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5143 } 5144 if (!col) { 5145 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5146 } 5147 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 /*@C 5152 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5153 5154 Collective on Mat 5155 5156 Input Parameters: 5157 + A,B - the matrices in mpiaij format 5158 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5159 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5160 5161 Output Parameter: 5162 + rowb, colb - index sets of rows and columns of B to extract 5163 - B_seq - the sequential matrix generated 5164 5165 Level: developer 5166 5167 @*/ 5168 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5169 { 5170 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5171 PetscErrorCode ierr; 5172 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5173 IS isrowb,iscolb; 5174 Mat *bseq=NULL; 5175 5176 PetscFunctionBegin; 5177 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5178 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5179 } 5180 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5181 5182 if (scall == MAT_INITIAL_MATRIX) { 5183 start = A->cmap->rstart; 5184 cmap = a->garray; 5185 nzA = a->A->cmap->n; 5186 nzB = a->B->cmap->n; 5187 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5188 ncols = 0; 5189 for (i=0; i<nzB; i++) { /* row < local row index */ 5190 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5191 else break; 5192 } 5193 imark = i; 5194 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5195 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5196 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5197 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5198 } else { 5199 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5200 isrowb = *rowb; iscolb = *colb; 5201 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5202 bseq[0] = *B_seq; 5203 } 5204 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5205 *B_seq = bseq[0]; 5206 ierr = PetscFree(bseq);CHKERRQ(ierr); 5207 if (!rowb) { 5208 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5209 } else { 5210 *rowb = isrowb; 5211 } 5212 if (!colb) { 5213 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5214 } else { 5215 *colb = iscolb; 5216 } 5217 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5218 PetscFunctionReturn(0); 5219 } 5220 5221 /* 5222 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5223 of the OFF-DIAGONAL portion of local A 5224 5225 Collective on Mat 5226 5227 Input Parameters: 5228 + A,B - the matrices in mpiaij format 5229 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5230 5231 Output Parameter: 5232 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5233 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5234 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5235 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5236 5237 Level: developer 5238 5239 */ 5240 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5241 { 5242 VecScatter_MPI_General *gen_to,*gen_from; 5243 PetscErrorCode ierr; 5244 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5245 Mat_SeqAIJ *b_oth; 5246 VecScatter ctx; 5247 MPI_Comm comm; 5248 PetscMPIInt *rprocs,*sprocs,tag,rank; 5249 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5250 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5251 PetscScalar *b_otha,*bufa,*bufA,*vals; 5252 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5253 MPI_Request *rwaits = NULL,*swaits = NULL; 5254 MPI_Status *sstatus,rstatus; 5255 PetscMPIInt jj,size; 5256 VecScatterType type; 5257 PetscBool mpi1; 5258 5259 PetscFunctionBegin; 5260 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5261 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5262 5263 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5264 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5265 } 5266 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5267 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5268 5269 if (size == 1) { 5270 startsj_s = NULL; 5271 bufa_ptr = NULL; 5272 *B_oth = NULL; 5273 PetscFunctionReturn(0); 5274 } 5275 5276 ctx = a->Mvctx; 5277 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5278 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5279 if (!mpi1) { 5280 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5281 thus create a->Mvctx_mpi1 */ 5282 if (!a->Mvctx_mpi1) { 5283 a->Mvctx_mpi1_flg = PETSC_TRUE; 5284 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5285 } 5286 ctx = a->Mvctx_mpi1; 5287 } 5288 tag = ((PetscObject)ctx)->tag; 5289 5290 gen_to = (VecScatter_MPI_General*)ctx->todata; 5291 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5292 nrecvs = gen_from->n; 5293 nsends = gen_to->n; 5294 5295 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5296 srow = gen_to->indices; /* local row index to be sent */ 5297 sstarts = gen_to->starts; 5298 sprocs = gen_to->procs; 5299 sstatus = gen_to->sstatus; 5300 sbs = gen_to->bs; 5301 rstarts = gen_from->starts; 5302 rprocs = gen_from->procs; 5303 rbs = gen_from->bs; 5304 5305 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5306 if (scall == MAT_INITIAL_MATRIX) { 5307 /* i-array */ 5308 /*---------*/ 5309 /* post receives */ 5310 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5311 for (i=0; i<nrecvs; i++) { 5312 rowlen = rvalues + rstarts[i]*rbs; 5313 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5314 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5315 } 5316 5317 /* pack the outgoing message */ 5318 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5319 5320 sstartsj[0] = 0; 5321 rstartsj[0] = 0; 5322 len = 0; /* total length of j or a array to be sent */ 5323 k = 0; 5324 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5325 for (i=0; i<nsends; i++) { 5326 rowlen = svalues + sstarts[i]*sbs; 5327 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5328 for (j=0; j<nrows; j++) { 5329 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5330 for (l=0; l<sbs; l++) { 5331 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5332 5333 rowlen[j*sbs+l] = ncols; 5334 5335 len += ncols; 5336 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5337 } 5338 k++; 5339 } 5340 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5341 5342 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5343 } 5344 /* recvs and sends of i-array are completed */ 5345 i = nrecvs; 5346 while (i--) { 5347 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5348 } 5349 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5350 ierr = PetscFree(svalues);CHKERRQ(ierr); 5351 5352 /* allocate buffers for sending j and a arrays */ 5353 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5354 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5355 5356 /* create i-array of B_oth */ 5357 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5358 5359 b_othi[0] = 0; 5360 len = 0; /* total length of j or a array to be received */ 5361 k = 0; 5362 for (i=0; i<nrecvs; i++) { 5363 rowlen = rvalues + rstarts[i]*rbs; 5364 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5365 for (j=0; j<nrows; j++) { 5366 b_othi[k+1] = b_othi[k] + rowlen[j]; 5367 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5368 k++; 5369 } 5370 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5371 } 5372 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5373 5374 /* allocate space for j and a arrrays of B_oth */ 5375 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5376 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5377 5378 /* j-array */ 5379 /*---------*/ 5380 /* post receives of j-array */ 5381 for (i=0; i<nrecvs; i++) { 5382 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5383 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5384 } 5385 5386 /* pack the outgoing message j-array */ 5387 k = 0; 5388 for (i=0; i<nsends; i++) { 5389 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5390 bufJ = bufj+sstartsj[i]; 5391 for (j=0; j<nrows; j++) { 5392 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5393 for (ll=0; ll<sbs; ll++) { 5394 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5395 for (l=0; l<ncols; l++) { 5396 *bufJ++ = cols[l]; 5397 } 5398 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5399 } 5400 } 5401 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5402 } 5403 5404 /* recvs and sends of j-array are completed */ 5405 i = nrecvs; 5406 while (i--) { 5407 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5408 } 5409 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5410 } else if (scall == MAT_REUSE_MATRIX) { 5411 sstartsj = *startsj_s; 5412 rstartsj = *startsj_r; 5413 bufa = *bufa_ptr; 5414 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5415 b_otha = b_oth->a; 5416 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5417 5418 /* a-array */ 5419 /*---------*/ 5420 /* post receives of a-array */ 5421 for (i=0; i<nrecvs; i++) { 5422 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5423 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5424 } 5425 5426 /* pack the outgoing message a-array */ 5427 k = 0; 5428 for (i=0; i<nsends; i++) { 5429 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5430 bufA = bufa+sstartsj[i]; 5431 for (j=0; j<nrows; j++) { 5432 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5433 for (ll=0; ll<sbs; ll++) { 5434 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5435 for (l=0; l<ncols; l++) { 5436 *bufA++ = vals[l]; 5437 } 5438 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5439 } 5440 } 5441 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5442 } 5443 /* recvs and sends of a-array are completed */ 5444 i = nrecvs; 5445 while (i--) { 5446 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5447 } 5448 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5449 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5450 5451 if (scall == MAT_INITIAL_MATRIX) { 5452 /* put together the new matrix */ 5453 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5454 5455 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5456 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5457 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5458 b_oth->free_a = PETSC_TRUE; 5459 b_oth->free_ij = PETSC_TRUE; 5460 b_oth->nonew = 0; 5461 5462 ierr = PetscFree(bufj);CHKERRQ(ierr); 5463 if (!startsj_s || !bufa_ptr) { 5464 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5465 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5466 } else { 5467 *startsj_s = sstartsj; 5468 *startsj_r = rstartsj; 5469 *bufa_ptr = bufa; 5470 } 5471 } 5472 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5473 PetscFunctionReturn(0); 5474 } 5475 5476 /*@C 5477 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5478 5479 Not Collective 5480 5481 Input Parameters: 5482 . A - The matrix in mpiaij format 5483 5484 Output Parameter: 5485 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5486 . colmap - A map from global column index to local index into lvec 5487 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5488 5489 Level: developer 5490 5491 @*/ 5492 #if defined(PETSC_USE_CTABLE) 5493 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5494 #else 5495 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5496 #endif 5497 { 5498 Mat_MPIAIJ *a; 5499 5500 PetscFunctionBegin; 5501 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5502 PetscValidPointer(lvec, 2); 5503 PetscValidPointer(colmap, 3); 5504 PetscValidPointer(multScatter, 4); 5505 a = (Mat_MPIAIJ*) A->data; 5506 if (lvec) *lvec = a->lvec; 5507 if (colmap) *colmap = a->colmap; 5508 if (multScatter) *multScatter = a->Mvctx; 5509 PetscFunctionReturn(0); 5510 } 5511 5512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5513 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5514 #if defined(PETSC_HAVE_MKL_SPARSE) 5515 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5516 #endif 5517 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5518 #if defined(PETSC_HAVE_ELEMENTAL) 5519 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5520 #endif 5521 #if defined(PETSC_HAVE_HYPRE) 5522 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5523 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5524 #endif 5525 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5526 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5527 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5528 5529 /* 5530 Computes (B'*A')' since computing B*A directly is untenable 5531 5532 n p p 5533 ( ) ( ) ( ) 5534 m ( A ) * n ( B ) = m ( C ) 5535 ( ) ( ) ( ) 5536 5537 */ 5538 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5539 { 5540 PetscErrorCode ierr; 5541 Mat At,Bt,Ct; 5542 5543 PetscFunctionBegin; 5544 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5545 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5546 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5547 ierr = MatDestroy(&At);CHKERRQ(ierr); 5548 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5549 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5550 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5551 PetscFunctionReturn(0); 5552 } 5553 5554 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5555 { 5556 PetscErrorCode ierr; 5557 PetscInt m=A->rmap->n,n=B->cmap->n; 5558 Mat Cmat; 5559 5560 PetscFunctionBegin; 5561 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5562 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5563 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5564 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5565 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5566 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5567 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5568 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5569 5570 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5571 5572 *C = Cmat; 5573 PetscFunctionReturn(0); 5574 } 5575 5576 /* ----------------------------------------------------------------*/ 5577 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5578 { 5579 PetscErrorCode ierr; 5580 5581 PetscFunctionBegin; 5582 if (scall == MAT_INITIAL_MATRIX) { 5583 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5584 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5585 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5586 } 5587 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5588 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5589 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5590 PetscFunctionReturn(0); 5591 } 5592 5593 /*MC 5594 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5595 5596 Options Database Keys: 5597 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5598 5599 Level: beginner 5600 5601 .seealso: MatCreateAIJ() 5602 M*/ 5603 5604 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5605 { 5606 Mat_MPIAIJ *b; 5607 PetscErrorCode ierr; 5608 PetscMPIInt size; 5609 5610 PetscFunctionBegin; 5611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5612 5613 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5614 B->data = (void*)b; 5615 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5616 B->assembled = PETSC_FALSE; 5617 B->insertmode = NOT_SET_VALUES; 5618 b->size = size; 5619 5620 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5621 5622 /* build cache for off array entries formed */ 5623 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5624 5625 b->donotstash = PETSC_FALSE; 5626 b->colmap = 0; 5627 b->garray = 0; 5628 b->roworiented = PETSC_TRUE; 5629 5630 /* stuff used for matrix vector multiply */ 5631 b->lvec = NULL; 5632 b->Mvctx = NULL; 5633 5634 /* stuff for MatGetRow() */ 5635 b->rowindices = 0; 5636 b->rowvalues = 0; 5637 b->getrowactive = PETSC_FALSE; 5638 5639 /* flexible pointer used in CUSP/CUSPARSE classes */ 5640 b->spptr = NULL; 5641 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5651 #if defined(PETSC_HAVE_MKL_SPARSE) 5652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5653 #endif 5654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5656 #if defined(PETSC_HAVE_ELEMENTAL) 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5658 #endif 5659 #if defined(PETSC_HAVE_HYPRE) 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5661 #endif 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5667 #if defined(PETSC_HAVE_HYPRE) 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5669 #endif 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5672 PetscFunctionReturn(0); 5673 } 5674 5675 /*@C 5676 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5677 and "off-diagonal" part of the matrix in CSR format. 5678 5679 Collective on MPI_Comm 5680 5681 Input Parameters: 5682 + comm - MPI communicator 5683 . m - number of local rows (Cannot be PETSC_DECIDE) 5684 . n - This value should be the same as the local size used in creating the 5685 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5686 calculated if N is given) For square matrices n is almost always m. 5687 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5688 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5689 . i - row indices for "diagonal" portion of matrix 5690 . j - column indices 5691 . a - matrix values 5692 . oi - row indices for "off-diagonal" portion of matrix 5693 . oj - column indices 5694 - oa - matrix values 5695 5696 Output Parameter: 5697 . mat - the matrix 5698 5699 Level: advanced 5700 5701 Notes: 5702 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5703 must free the arrays once the matrix has been destroyed and not before. 5704 5705 The i and j indices are 0 based 5706 5707 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5708 5709 This sets local rows and cannot be used to set off-processor values. 5710 5711 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5712 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5713 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5714 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5715 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5716 communication if it is known that only local entries will be set. 5717 5718 .keywords: matrix, aij, compressed row, sparse, parallel 5719 5720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5721 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5722 @*/ 5723 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5724 { 5725 PetscErrorCode ierr; 5726 Mat_MPIAIJ *maij; 5727 5728 PetscFunctionBegin; 5729 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5730 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5731 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5732 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5733 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5734 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5735 maij = (Mat_MPIAIJ*) (*mat)->data; 5736 5737 (*mat)->preallocated = PETSC_TRUE; 5738 5739 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5740 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5741 5742 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5743 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5744 5745 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5746 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5747 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5748 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5749 5750 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5751 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5752 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5753 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5754 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5755 PetscFunctionReturn(0); 5756 } 5757 5758 /* 5759 Special version for direct calls from Fortran 5760 */ 5761 #include <petsc/private/fortranimpl.h> 5762 5763 /* Change these macros so can be used in void function */ 5764 #undef CHKERRQ 5765 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5766 #undef SETERRQ2 5767 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5768 #undef SETERRQ3 5769 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5770 #undef SETERRQ 5771 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5772 5773 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5774 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5775 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5776 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5777 #else 5778 #endif 5779 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5780 { 5781 Mat mat = *mmat; 5782 PetscInt m = *mm, n = *mn; 5783 InsertMode addv = *maddv; 5784 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5785 PetscScalar value; 5786 PetscErrorCode ierr; 5787 5788 MatCheckPreallocated(mat,1); 5789 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5790 5791 #if defined(PETSC_USE_DEBUG) 5792 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5793 #endif 5794 { 5795 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5796 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5797 PetscBool roworiented = aij->roworiented; 5798 5799 /* Some Variables required in the macro */ 5800 Mat A = aij->A; 5801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5802 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5803 MatScalar *aa = a->a; 5804 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5805 Mat B = aij->B; 5806 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5807 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5808 MatScalar *ba = b->a; 5809 5810 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5811 PetscInt nonew = a->nonew; 5812 MatScalar *ap1,*ap2; 5813 5814 PetscFunctionBegin; 5815 for (i=0; i<m; i++) { 5816 if (im[i] < 0) continue; 5817 #if defined(PETSC_USE_DEBUG) 5818 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5819 #endif 5820 if (im[i] >= rstart && im[i] < rend) { 5821 row = im[i] - rstart; 5822 lastcol1 = -1; 5823 rp1 = aj + ai[row]; 5824 ap1 = aa + ai[row]; 5825 rmax1 = aimax[row]; 5826 nrow1 = ailen[row]; 5827 low1 = 0; 5828 high1 = nrow1; 5829 lastcol2 = -1; 5830 rp2 = bj + bi[row]; 5831 ap2 = ba + bi[row]; 5832 rmax2 = bimax[row]; 5833 nrow2 = bilen[row]; 5834 low2 = 0; 5835 high2 = nrow2; 5836 5837 for (j=0; j<n; j++) { 5838 if (roworiented) value = v[i*n+j]; 5839 else value = v[i+j*m]; 5840 if (in[j] >= cstart && in[j] < cend) { 5841 col = in[j] - cstart; 5842 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5843 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5844 } else if (in[j] < 0) continue; 5845 #if defined(PETSC_USE_DEBUG) 5846 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5847 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5848 #endif 5849 else { 5850 if (mat->was_assembled) { 5851 if (!aij->colmap) { 5852 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5853 } 5854 #if defined(PETSC_USE_CTABLE) 5855 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5856 col--; 5857 #else 5858 col = aij->colmap[in[j]] - 1; 5859 #endif 5860 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5861 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5862 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5863 col = in[j]; 5864 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5865 B = aij->B; 5866 b = (Mat_SeqAIJ*)B->data; 5867 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5868 rp2 = bj + bi[row]; 5869 ap2 = ba + bi[row]; 5870 rmax2 = bimax[row]; 5871 nrow2 = bilen[row]; 5872 low2 = 0; 5873 high2 = nrow2; 5874 bm = aij->B->rmap->n; 5875 ba = b->a; 5876 } 5877 } else col = in[j]; 5878 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5879 } 5880 } 5881 } else if (!aij->donotstash) { 5882 if (roworiented) { 5883 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5884 } else { 5885 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5886 } 5887 } 5888 } 5889 } 5890 PetscFunctionReturnVoid(); 5891 } 5892 5893