1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 636 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 Mat A = aij->A; /* diagonal part of the matrix */ 640 Mat B = aij->B; /* offdiagonal part of the matrix */ 641 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 642 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 643 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 644 PetscInt *ailen = a->ilen,*aj = a->j; 645 PetscInt *bilen = b->ilen,*bj = b->j; 646 PetscInt am = aij->A->rmap->n,i; 647 PetscInt col, diag_so_far=0, offd_so_far=0,j,*first_diag_elem; 648 PetscErrorCode ierr; 649 650 PetscFunctionBegin; 651 652 /* Allocate memory. first_diag stores which element tells which element of mat_j stores the first diagonal element */ 653 ierr = PetscMalloc1(am+1, &first_diag_elem);CHKERRQ(ierr); 654 655 /* Find first index of mat_j which is in the diagonal and store it in first_diag */ 656 /* Iterate over all rows of the matrix */ 657 for (j=0; j<am; j++) { 658 /* Iterate over all columns, until a diagonal element is found */ 659 for (i=0; i<dnz[j]+onz[j]; i++) { 660 col = i + mat_i[j]; 661 /* If current element is in the diagonal */ 662 if (mat_j[col] >= cstart && mat_j[col] < cend) { 663 first_diag_elem[j] = i; 664 break; 665 } 666 } 667 } 668 669 /* Set the off-diagonal elements */ 670 for (j=0; j<am; j++) { 671 /* left off-diagonal */ 672 for (i=0; i<onz[j] && i<first_diag_elem[j]; i++) { 673 col = i + mat_i[j]; 674 bj[i+offd_so_far] = mat_j[col]; 675 } 676 /* right off-diagonal */ 677 for (i=i+dnz[j]; i<dnz[j]+onz[j]; i++) { 678 col = i + mat_i[j]; 679 bj[i-dnz[j]+offd_so_far] = mat_j[col]; 680 } 681 bilen[j] = onz[j]; 682 offd_so_far += onz[j]; 683 } 684 685 /* Set the diagonal elements */ 686 for (j=0; j<am; j++) { 687 for (i=0; i<dnz[j]; i++) { 688 col = i + mat_i[j]; 689 aj[diag_so_far + i] = mat_j[col+first_diag_elem[j]] - cstart; 690 ailen[j] = dnz[j]; 691 } 692 diag_so_far += dnz[j]; 693 } 694 ierr = PetscFree(first_diag_elem);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 699 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 700 { 701 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 702 PetscErrorCode ierr; 703 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 704 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 705 706 PetscFunctionBegin; 707 for (i=0; i<m; i++) { 708 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 709 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 710 if (idxm[i] >= rstart && idxm[i] < rend) { 711 row = idxm[i] - rstart; 712 for (j=0; j<n; j++) { 713 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 714 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 715 if (idxn[j] >= cstart && idxn[j] < cend) { 716 col = idxn[j] - cstart; 717 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 718 } else { 719 if (!aij->colmap) { 720 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 721 } 722 #if defined(PETSC_USE_CTABLE) 723 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 724 col--; 725 #else 726 col = aij->colmap[idxn[j]] - 1; 727 #endif 728 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 729 else { 730 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 731 } 732 } 733 } 734 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 740 741 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 PetscErrorCode ierr; 745 PetscInt nstash,reallocs; 746 747 PetscFunctionBegin; 748 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 749 750 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 751 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 752 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 757 { 758 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 759 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 760 PetscErrorCode ierr; 761 PetscMPIInt n; 762 PetscInt i,j,rstart,ncols,flg; 763 PetscInt *row,*col; 764 PetscBool other_disassembled; 765 PetscScalar *val; 766 767 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 768 769 PetscFunctionBegin; 770 if (!aij->donotstash && !mat->nooffprocentries) { 771 while (1) { 772 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 773 if (!flg) break; 774 775 for (i=0; i<n; ) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j=i,rstart=row[j]; j<n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j-i; 781 else ncols = n-i; 782 /* Now assemble all these values with a single function call */ 783 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 784 785 i = j; 786 } 787 } 788 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 789 } 790 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 791 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 792 793 /* determine if any processor has disassembled, if so we must 794 also disassemble ourselfs, in order that we may reassemble. */ 795 /* 796 if nonzero structure of submatrix B cannot change then we know that 797 no processor disassembled thus we can skip this stuff 798 */ 799 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 800 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 801 if (mat->was_assembled && !other_disassembled) { 802 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 803 } 804 } 805 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 806 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 807 } 808 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 809 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 810 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 811 812 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 813 814 aij->rowvalues = 0; 815 816 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 817 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 818 819 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 820 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 821 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 822 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 823 } 824 PetscFunctionReturn(0); 825 } 826 827 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 828 { 829 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 830 PetscErrorCode ierr; 831 832 PetscFunctionBegin; 833 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 834 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 841 PetscInt *lrows; 842 PetscInt r, len; 843 PetscBool cong; 844 PetscErrorCode ierr; 845 846 PetscFunctionBegin; 847 /* get locally owned rows */ 848 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 849 /* fix right hand side if needed */ 850 if (x && b) { 851 const PetscScalar *xx; 852 PetscScalar *bb; 853 854 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 855 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 856 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 857 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 858 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 859 } 860 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 861 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 862 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 863 if ((diag != 0.0) && cong) { 864 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 865 } else if (diag != 0.0) { 866 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 867 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 868 for (r = 0; r < len; ++r) { 869 const PetscInt row = lrows[r] + A->rmap->rstart; 870 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 871 } 872 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 873 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 874 } else { 875 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 876 } 877 ierr = PetscFree(lrows);CHKERRQ(ierr); 878 879 /* only change matrix nonzero state if pattern was allowed to be changed */ 880 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 881 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 882 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 883 } 884 PetscFunctionReturn(0); 885 } 886 887 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 888 { 889 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 890 PetscErrorCode ierr; 891 PetscMPIInt n = A->rmap->n; 892 PetscInt i,j,r,m,p = 0,len = 0; 893 PetscInt *lrows,*owners = A->rmap->range; 894 PetscSFNode *rrows; 895 PetscSF sf; 896 const PetscScalar *xx; 897 PetscScalar *bb,*mask; 898 Vec xmask,lmask; 899 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 900 const PetscInt *aj, *ii,*ridx; 901 PetscScalar *aa; 902 903 PetscFunctionBegin; 904 /* Create SF where leaves are input rows and roots are owned rows */ 905 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 906 for (r = 0; r < n; ++r) lrows[r] = -1; 907 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 908 for (r = 0; r < N; ++r) { 909 const PetscInt idx = rows[r]; 910 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 911 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 912 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 913 } 914 rrows[r].rank = p; 915 rrows[r].index = rows[r] - owners[p]; 916 } 917 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 918 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 919 /* Collect flags for rows to be zeroed */ 920 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 921 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 922 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 923 /* Compress and put in row numbers */ 924 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 925 /* zero diagonal part of matrix */ 926 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 927 /* handle off diagonal part of matrix */ 928 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 929 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 930 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 931 for (i=0; i<len; i++) bb[lrows[i]] = 1; 932 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 933 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 934 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 936 if (x) { 937 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 940 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 941 } 942 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 943 /* remove zeroed rows of off diagonal matrix */ 944 ii = aij->i; 945 for (i=0; i<len; i++) { 946 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 947 } 948 /* loop over all elements of off process part of matrix zeroing removed columns*/ 949 if (aij->compressedrow.use) { 950 m = aij->compressedrow.nrows; 951 ii = aij->compressedrow.i; 952 ridx = aij->compressedrow.rindex; 953 for (i=0; i<m; i++) { 954 n = ii[i+1] - ii[i]; 955 aj = aij->j + ii[i]; 956 aa = aij->a + ii[i]; 957 958 for (j=0; j<n; j++) { 959 if (PetscAbsScalar(mask[*aj])) { 960 if (b) bb[*ridx] -= *aa*xx[*aj]; 961 *aa = 0.0; 962 } 963 aa++; 964 aj++; 965 } 966 ridx++; 967 } 968 } else { /* do not use compressed row format */ 969 m = l->B->rmap->n; 970 for (i=0; i<m; i++) { 971 n = ii[i+1] - ii[i]; 972 aj = aij->j + ii[i]; 973 aa = aij->a + ii[i]; 974 for (j=0; j<n; j++) { 975 if (PetscAbsScalar(mask[*aj])) { 976 if (b) bb[i] -= *aa*xx[*aj]; 977 *aa = 0.0; 978 } 979 aa++; 980 aj++; 981 } 982 } 983 } 984 if (x) { 985 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 986 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 987 } 988 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 989 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 990 ierr = PetscFree(lrows);CHKERRQ(ierr); 991 992 /* only change matrix nonzero state if pattern was allowed to be changed */ 993 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 994 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 995 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 996 } 997 PetscFunctionReturn(0); 998 } 999 1000 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1001 { 1002 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1003 PetscErrorCode ierr; 1004 PetscInt nt; 1005 VecScatter Mvctx = a->Mvctx; 1006 1007 PetscFunctionBegin; 1008 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1009 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1010 1011 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1019 { 1020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1021 PetscErrorCode ierr; 1022 1023 PetscFunctionBegin; 1024 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1029 { 1030 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1031 PetscErrorCode ierr; 1032 VecScatter Mvctx = a->Mvctx; 1033 1034 PetscFunctionBegin; 1035 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1036 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1044 { 1045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 PetscBool merged; 1048 1049 PetscFunctionBegin; 1050 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1051 /* do nondiagonal part */ 1052 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1053 if (!merged) { 1054 /* send it on its way */ 1055 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1056 /* do local part */ 1057 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1058 /* receive remote parts: note this assumes the values are not actually */ 1059 /* added in yy until the next line, */ 1060 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 } else { 1062 /* do local part */ 1063 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1064 /* send it on its way */ 1065 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1066 /* values actually were received in the Begin() but we need to call this nop */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1073 { 1074 MPI_Comm comm; 1075 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1076 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1077 IS Me,Notme; 1078 PetscErrorCode ierr; 1079 PetscInt M,N,first,last,*notme,i; 1080 PetscMPIInt size; 1081 1082 PetscFunctionBegin; 1083 /* Easy test: symmetric diagonal block */ 1084 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1085 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1086 if (!*f) PetscFunctionReturn(0); 1087 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1088 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1089 if (size == 1) PetscFunctionReturn(0); 1090 1091 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1092 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1093 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1094 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1095 for (i=0; i<first; i++) notme[i] = i; 1096 for (i=last; i<M; i++) notme[i-last+first] = i; 1097 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1098 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1099 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1100 Aoff = Aoffs[0]; 1101 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1102 Boff = Boffs[0]; 1103 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1104 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1105 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1106 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1107 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1108 ierr = PetscFree(notme);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1113 { 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 /* do nondiagonal part */ 1128 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1129 /* send it on its way */ 1130 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1131 /* do local part */ 1132 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1133 /* receive remote parts */ 1134 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 /* 1139 This only works correctly for square matrices where the subblock A->A is the 1140 diagonal block 1141 */ 1142 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1143 { 1144 PetscErrorCode ierr; 1145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1146 1147 PetscFunctionBegin; 1148 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1149 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1150 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1155 { 1156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1161 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 #if defined(PETSC_USE_LOG) 1172 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1173 #endif 1174 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1175 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1176 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1177 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1178 #if defined(PETSC_USE_CTABLE) 1179 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1180 #else 1181 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1182 #endif 1183 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1184 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1185 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1186 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1187 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1188 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1189 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1190 1191 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1192 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1193 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1199 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1200 #if defined(PETSC_HAVE_ELEMENTAL) 1201 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1202 #endif 1203 #if defined(PETSC_HAVE_HYPRE) 1204 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1205 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1208 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1213 { 1214 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1215 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1216 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1217 PetscErrorCode ierr; 1218 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1219 int fd; 1220 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1221 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1222 PetscScalar *column_values; 1223 PetscInt message_count,flowcontrolcount; 1224 FILE *file; 1225 1226 PetscFunctionBegin; 1227 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1228 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1229 nz = A->nz + B->nz; 1230 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1231 if (!rank) { 1232 header[0] = MAT_FILE_CLASSID; 1233 header[1] = mat->rmap->N; 1234 header[2] = mat->cmap->N; 1235 1236 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1238 /* get largest number of rows any processor has */ 1239 rlen = mat->rmap->n; 1240 range = mat->rmap->range; 1241 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1242 } else { 1243 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 rlen = mat->rmap->n; 1245 } 1246 1247 /* load up the local row counts */ 1248 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1249 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1250 1251 /* store the row lengths to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1255 for (i=1; i<size; i++) { 1256 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1257 rlen = range[i+1] - range[i]; 1258 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1266 } 1267 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1268 1269 /* load up the local column indices */ 1270 nzmax = nz; /* th processor needs space a largest processor needs */ 1271 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1273 cnt = 0; 1274 for (i=0; i<mat->rmap->n; i++) { 1275 for (j=B->i[i]; j<B->i[i+1]; j++) { 1276 if ((col = garray[B->j[j]]) > cstart) break; 1277 column_indices[cnt++] = col; 1278 } 1279 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1280 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1281 } 1282 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1283 1284 /* store the column indices to the file */ 1285 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1286 if (!rank) { 1287 MPI_Status status; 1288 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1289 for (i=1; i<size; i++) { 1290 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1291 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1292 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1293 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1295 } 1296 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1297 } else { 1298 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1299 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1300 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1302 } 1303 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1304 1305 /* load up the local column values */ 1306 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1307 cnt = 0; 1308 for (i=0; i<mat->rmap->n; i++) { 1309 for (j=B->i[i]; j<B->i[i+1]; j++) { 1310 if (garray[B->j[j]] > cstart) break; 1311 column_values[cnt++] = B->a[j]; 1312 } 1313 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1314 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1315 } 1316 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1317 1318 /* store the column values to the file */ 1319 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1320 if (!rank) { 1321 MPI_Status status; 1322 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1323 for (i=1; i<size; i++) { 1324 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1325 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1326 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1327 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1328 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1329 } 1330 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1331 } else { 1332 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1333 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1335 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1336 } 1337 ierr = PetscFree(column_values);CHKERRQ(ierr); 1338 1339 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1340 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #include <petscdraw.h> 1345 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1346 { 1347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1348 PetscErrorCode ierr; 1349 PetscMPIInt rank = aij->rank,size = aij->size; 1350 PetscBool isdraw,iascii,isbinary; 1351 PetscViewer sviewer; 1352 PetscViewerFormat format; 1353 1354 PetscFunctionBegin; 1355 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1356 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1357 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1358 if (iascii) { 1359 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1360 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1361 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1362 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1363 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1364 for (i=0; i<(PetscInt)size; i++) { 1365 nmax = PetscMax(nmax,nz[i]); 1366 nmin = PetscMin(nmin,nz[i]); 1367 navg += nz[i]; 1368 } 1369 ierr = PetscFree(nz);CHKERRQ(ierr); 1370 navg = navg/size; 1371 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1375 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1376 MatInfo info; 1377 PetscBool inodes; 1378 1379 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1380 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1381 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1382 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1383 if (!inodes) { 1384 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1385 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1388 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1389 } 1390 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1391 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1392 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1393 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1397 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1400 PetscInt inodecount,inodelimit,*inodes; 1401 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1402 if (inodes) { 1403 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1406 } 1407 PetscFunctionReturn(0); 1408 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1409 PetscFunctionReturn(0); 1410 } 1411 } else if (isbinary) { 1412 if (size == 1) { 1413 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1414 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1415 } else { 1416 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } else if (isdraw) { 1420 PetscDraw draw; 1421 PetscBool isnull; 1422 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1423 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1424 if (isnull) PetscFunctionReturn(0); 1425 } 1426 1427 { 1428 /* assemble the entire matrix onto first processor. */ 1429 Mat A; 1430 Mat_SeqAIJ *Aloc; 1431 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1432 MatScalar *a; 1433 1434 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1435 if (!rank) { 1436 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1437 } else { 1438 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1439 } 1440 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1441 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1442 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1443 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1444 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1445 1446 /* copy over the A part */ 1447 Aloc = (Mat_SeqAIJ*)aij->A->data; 1448 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1449 row = mat->rmap->rstart; 1450 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1451 for (i=0; i<m; i++) { 1452 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1453 row++; 1454 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1455 } 1456 aj = Aloc->j; 1457 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1458 1459 /* copy over the B part */ 1460 Aloc = (Mat_SeqAIJ*)aij->B->data; 1461 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1462 row = mat->rmap->rstart; 1463 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1464 ct = cols; 1465 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1466 for (i=0; i<m; i++) { 1467 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1468 row++; 1469 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1470 } 1471 ierr = PetscFree(ct);CHKERRQ(ierr); 1472 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1473 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 /* 1475 Everyone has to call to draw the matrix since the graphics waits are 1476 synchronized across all processors that share the PetscDraw object 1477 */ 1478 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1479 if (!rank) { 1480 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1481 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1482 } 1483 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1484 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1485 ierr = MatDestroy(&A);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1491 { 1492 PetscErrorCode ierr; 1493 PetscBool iascii,isdraw,issocket,isbinary; 1494 1495 PetscFunctionBegin; 1496 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1497 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1498 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1499 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1500 if (iascii || isdraw || isbinary || issocket) { 1501 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1507 { 1508 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1509 PetscErrorCode ierr; 1510 Vec bb1 = 0; 1511 PetscBool hasop; 1512 1513 PetscFunctionBegin; 1514 if (flag == SOR_APPLY_UPPER) { 1515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 1519 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1520 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1521 } 1522 1523 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1524 if (flag & SOR_ZERO_INITIAL_GUESS) { 1525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1526 its--; 1527 } 1528 1529 while (its--) { 1530 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 1533 /* update rhs: bb1 = bb - B*x */ 1534 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1536 1537 /* local sweep */ 1538 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1539 } 1540 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1541 if (flag & SOR_ZERO_INITIAL_GUESS) { 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1543 its--; 1544 } 1545 while (its--) { 1546 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 1549 /* update rhs: bb1 = bb - B*x */ 1550 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1551 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1552 1553 /* local sweep */ 1554 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1555 } 1556 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1557 if (flag & SOR_ZERO_INITIAL_GUESS) { 1558 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1559 its--; 1560 } 1561 while (its--) { 1562 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 1565 /* update rhs: bb1 = bb - B*x */ 1566 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1567 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1568 1569 /* local sweep */ 1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1571 } 1572 } else if (flag & SOR_EISENSTAT) { 1573 Vec xx1; 1574 1575 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1577 1578 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 if (!mat->diag) { 1581 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1582 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1583 } 1584 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1585 if (hasop) { 1586 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1587 } else { 1588 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1589 } 1590 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1591 1592 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1593 1594 /* local sweep */ 1595 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1596 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1597 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1598 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1599 1600 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1601 1602 matin->factorerrortype = mat->A->factorerrortype; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1607 { 1608 Mat aA,aB,Aperm; 1609 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1610 PetscScalar *aa,*ba; 1611 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1612 PetscSF rowsf,sf; 1613 IS parcolp = NULL; 1614 PetscBool done; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1619 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1620 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1621 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1622 1623 /* Invert row permutation to find out where my rows should go */ 1624 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1625 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1626 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1627 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1628 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1629 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1630 1631 /* Invert column permutation to find out where my columns should go */ 1632 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1633 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1634 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1635 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1636 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1637 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1638 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1639 1640 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1643 1644 /* Find out where my gcols should go */ 1645 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1646 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1647 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1648 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1649 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1650 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1651 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1652 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1653 1654 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1655 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1656 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1657 for (i=0; i<m; i++) { 1658 PetscInt row = rdest[i],rowner; 1659 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1660 for (j=ai[i]; j<ai[i+1]; j++) { 1661 PetscInt cowner,col = cdest[aj[j]]; 1662 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1663 if (rowner == cowner) dnnz[i]++; 1664 else onnz[i]++; 1665 } 1666 for (j=bi[i]; j<bi[i+1]; j++) { 1667 PetscInt cowner,col = gcdest[bj[j]]; 1668 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1669 if (rowner == cowner) dnnz[i]++; 1670 else onnz[i]++; 1671 } 1672 } 1673 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1674 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1675 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1676 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1677 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1678 1679 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1680 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1681 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1682 for (i=0; i<m; i++) { 1683 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1684 PetscInt j0,rowlen; 1685 rowlen = ai[i+1] - ai[i]; 1686 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1687 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1688 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1689 } 1690 rowlen = bi[i+1] - bi[i]; 1691 for (j0=j=0; j<rowlen; j0=j) { 1692 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1693 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1694 } 1695 } 1696 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1697 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1698 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1699 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1700 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1701 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1702 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1704 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1705 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1706 *B = Aperm; 1707 PetscFunctionReturn(0); 1708 } 1709 1710 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1711 { 1712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1717 if (ghosts) *ghosts = aij->garray; 1718 PetscFunctionReturn(0); 1719 } 1720 1721 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1722 { 1723 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1724 Mat A = mat->A,B = mat->B; 1725 PetscErrorCode ierr; 1726 PetscReal isend[5],irecv[5]; 1727 1728 PetscFunctionBegin; 1729 info->block_size = 1.0; 1730 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1731 1732 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1733 isend[3] = info->memory; isend[4] = info->mallocs; 1734 1735 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1736 1737 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1738 isend[3] += info->memory; isend[4] += info->mallocs; 1739 if (flag == MAT_LOCAL) { 1740 info->nz_used = isend[0]; 1741 info->nz_allocated = isend[1]; 1742 info->nz_unneeded = isend[2]; 1743 info->memory = isend[3]; 1744 info->mallocs = isend[4]; 1745 } else if (flag == MAT_GLOBAL_MAX) { 1746 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1747 1748 info->nz_used = irecv[0]; 1749 info->nz_allocated = irecv[1]; 1750 info->nz_unneeded = irecv[2]; 1751 info->memory = irecv[3]; 1752 info->mallocs = irecv[4]; 1753 } else if (flag == MAT_GLOBAL_SUM) { 1754 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1755 1756 info->nz_used = irecv[0]; 1757 info->nz_allocated = irecv[1]; 1758 info->nz_unneeded = irecv[2]; 1759 info->memory = irecv[3]; 1760 info->mallocs = irecv[4]; 1761 } 1762 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1763 info->fill_ratio_needed = 0; 1764 info->factor_mallocs = 0; 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1769 { 1770 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 switch (op) { 1775 case MAT_NEW_NONZERO_LOCATIONS: 1776 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1777 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1778 case MAT_KEEP_NONZERO_PATTERN: 1779 case MAT_NEW_NONZERO_LOCATION_ERR: 1780 case MAT_USE_INODES: 1781 case MAT_IGNORE_ZERO_ENTRIES: 1782 MatCheckPreallocated(A,1); 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1785 break; 1786 case MAT_ROW_ORIENTED: 1787 MatCheckPreallocated(A,1); 1788 a->roworiented = flg; 1789 1790 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1791 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_NEW_DIAGONALS: 1794 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1795 break; 1796 case MAT_IGNORE_OFF_PROC_ENTRIES: 1797 a->donotstash = flg; 1798 break; 1799 case MAT_SPD: 1800 A->spd_set = PETSC_TRUE; 1801 A->spd = flg; 1802 if (flg) { 1803 A->symmetric = PETSC_TRUE; 1804 A->structurally_symmetric = PETSC_TRUE; 1805 A->symmetric_set = PETSC_TRUE; 1806 A->structurally_symmetric_set = PETSC_TRUE; 1807 } 1808 break; 1809 case MAT_SYMMETRIC: 1810 MatCheckPreallocated(A,1); 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 break; 1813 case MAT_STRUCTURALLY_SYMMETRIC: 1814 MatCheckPreallocated(A,1); 1815 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_HERMITIAN: 1818 MatCheckPreallocated(A,1); 1819 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1820 break; 1821 case MAT_SYMMETRY_ETERNAL: 1822 MatCheckPreallocated(A,1); 1823 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1824 break; 1825 case MAT_SUBMAT_SINGLEIS: 1826 A->submat_singleis = flg; 1827 break; 1828 case MAT_STRUCTURE_ONLY: 1829 /* The option is handled directly by MatSetOption() */ 1830 break; 1831 default: 1832 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1833 } 1834 PetscFunctionReturn(0); 1835 } 1836 1837 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1838 { 1839 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1840 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1841 PetscErrorCode ierr; 1842 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1843 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1844 PetscInt *cmap,*idx_p; 1845 1846 PetscFunctionBegin; 1847 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1848 mat->getrowactive = PETSC_TRUE; 1849 1850 if (!mat->rowvalues && (idx || v)) { 1851 /* 1852 allocate enough space to hold information from the longest row. 1853 */ 1854 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1855 PetscInt max = 1,tmp; 1856 for (i=0; i<matin->rmap->n; i++) { 1857 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1858 if (max < tmp) max = tmp; 1859 } 1860 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1861 } 1862 1863 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1864 lrow = row - rstart; 1865 1866 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1867 if (!v) {pvA = 0; pvB = 0;} 1868 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1869 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1870 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1871 nztot = nzA + nzB; 1872 1873 cmap = mat->garray; 1874 if (v || idx) { 1875 if (nztot) { 1876 /* Sort by increasing column numbers, assuming A and B already sorted */ 1877 PetscInt imark = -1; 1878 if (v) { 1879 *v = v_p = mat->rowvalues; 1880 for (i=0; i<nzB; i++) { 1881 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1882 else break; 1883 } 1884 imark = i; 1885 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1886 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1887 } 1888 if (idx) { 1889 *idx = idx_p = mat->rowindices; 1890 if (imark > -1) { 1891 for (i=0; i<imark; i++) { 1892 idx_p[i] = cmap[cworkB[i]]; 1893 } 1894 } else { 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1897 else break; 1898 } 1899 imark = i; 1900 } 1901 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1902 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 if (idx) *idx = 0; 1906 if (v) *v = 0; 1907 } 1908 } 1909 *nz = nztot; 1910 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1916 { 1917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1918 1919 PetscFunctionBegin; 1920 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1921 aij->getrowactive = PETSC_FALSE; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1929 PetscErrorCode ierr; 1930 PetscInt i,j,cstart = mat->cmap->rstart; 1931 PetscReal sum = 0.0; 1932 MatScalar *v; 1933 1934 PetscFunctionBegin; 1935 if (aij->size == 1) { 1936 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1937 } else { 1938 if (type == NORM_FROBENIUS) { 1939 v = amat->a; 1940 for (i=0; i<amat->nz; i++) { 1941 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1942 } 1943 v = bmat->a; 1944 for (i=0; i<bmat->nz; i++) { 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 } 1947 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1948 *norm = PetscSqrtReal(*norm); 1949 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1950 } else if (type == NORM_1) { /* max column norm */ 1951 PetscReal *tmp,*tmp2; 1952 PetscInt *jj,*garray = aij->garray; 1953 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1954 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1955 *norm = 0.0; 1956 v = amat->a; jj = amat->j; 1957 for (j=0; j<amat->nz; j++) { 1958 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1959 } 1960 v = bmat->a; jj = bmat->j; 1961 for (j=0; j<bmat->nz; j++) { 1962 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1963 } 1964 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1965 for (j=0; j<mat->cmap->N; j++) { 1966 if (tmp2[j] > *norm) *norm = tmp2[j]; 1967 } 1968 ierr = PetscFree(tmp);CHKERRQ(ierr); 1969 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1970 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1971 } else if (type == NORM_INFINITY) { /* max row norm */ 1972 PetscReal ntemp = 0.0; 1973 for (j=0; j<aij->A->rmap->n; j++) { 1974 v = amat->a + amat->i[j]; 1975 sum = 0.0; 1976 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1977 sum += PetscAbsScalar(*v); v++; 1978 } 1979 v = bmat->a + bmat->i[j]; 1980 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1981 sum += PetscAbsScalar(*v); v++; 1982 } 1983 if (sum > ntemp) ntemp = sum; 1984 } 1985 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1986 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1987 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1993 { 1994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1995 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1996 PetscErrorCode ierr; 1997 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1998 PetscInt cstart = A->cmap->rstart,ncol; 1999 Mat B; 2000 MatScalar *array; 2001 2002 PetscFunctionBegin; 2003 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2004 ai = Aloc->i; aj = Aloc->j; 2005 bi = Bloc->i; bj = Bloc->j; 2006 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2007 PetscInt *d_nnz,*g_nnz,*o_nnz; 2008 PetscSFNode *oloc; 2009 PETSC_UNUSED PetscSF sf; 2010 2011 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2012 /* compute d_nnz for preallocation */ 2013 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) { 2015 d_nnz[aj[i]]++; 2016 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2017 } 2018 /* compute local off-diagonal contributions */ 2019 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2021 /* map those to global */ 2022 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2023 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2024 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2025 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2026 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2027 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2028 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2029 2030 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2033 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2034 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2035 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2039 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2040 } 2041 2042 /* copy over the A part */ 2043 array = Aloc->a; 2044 row = A->rmap->rstart; 2045 for (i=0; i<ma; i++) { 2046 ncol = ai[i+1]-ai[i]; 2047 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2048 row++; 2049 array += ncol; aj += ncol; 2050 } 2051 aj = Aloc->j; 2052 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2053 2054 /* copy over the B part */ 2055 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2056 array = Bloc->a; 2057 row = A->rmap->rstart; 2058 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2059 cols_tmp = cols; 2060 for (i=0; i<mb; i++) { 2061 ncol = bi[i+1]-bi[i]; 2062 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; 2064 array += ncol; cols_tmp += ncol; 2065 } 2066 ierr = PetscFree(cols);CHKERRQ(ierr); 2067 2068 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2071 *matout = B; 2072 } else { 2073 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2079 { 2080 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2081 Mat a = aij->A,b = aij->B; 2082 PetscErrorCode ierr; 2083 PetscInt s1,s2,s3; 2084 2085 PetscFunctionBegin; 2086 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2087 if (rr) { 2088 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2089 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2090 /* Overlap communication with computation. */ 2091 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2092 } 2093 if (ll) { 2094 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2095 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2096 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2097 } 2098 /* scale the diagonal block */ 2099 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2100 2101 if (rr) { 2102 /* Do a scatter end and then right scale the off-diagonal block */ 2103 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2105 } 2106 PetscFunctionReturn(0); 2107 } 2108 2109 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2110 { 2111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2112 PetscErrorCode ierr; 2113 2114 PetscFunctionBegin; 2115 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2120 { 2121 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2122 Mat a,b,c,d; 2123 PetscBool flg; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 a = matA->A; b = matA->B; 2128 c = matB->A; d = matB->B; 2129 2130 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2131 if (flg) { 2132 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2133 } 2134 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2139 { 2140 PetscErrorCode ierr; 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2142 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2143 2144 PetscFunctionBegin; 2145 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2146 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2147 /* because of the column compression in the off-processor part of the matrix a->B, 2148 the number of columns in a->B and b->B may be different, hence we cannot call 2149 the MatCopy() directly on the two parts. If need be, we can provide a more 2150 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2151 then copying the submatrices */ 2152 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2153 } else { 2154 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2155 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2156 } 2157 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2162 { 2163 PetscErrorCode ierr; 2164 2165 PetscFunctionBegin; 2166 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /* 2171 Computes the number of nonzeros per row needed for preallocation when X and Y 2172 have different nonzero structure. 2173 */ 2174 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2175 { 2176 PetscInt i,j,k,nzx,nzy; 2177 2178 PetscFunctionBegin; 2179 /* Set the number of nonzeros in the new matrix */ 2180 for (i=0; i<m; i++) { 2181 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2182 nzx = xi[i+1] - xi[i]; 2183 nzy = yi[i+1] - yi[i]; 2184 nnz[i] = 0; 2185 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2186 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2187 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2188 nnz[i]++; 2189 } 2190 for (; k<nzy; k++) nnz[i]++; 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2196 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2197 { 2198 PetscErrorCode ierr; 2199 PetscInt m = Y->rmap->N; 2200 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2201 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2202 2203 PetscFunctionBegin; 2204 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2209 { 2210 PetscErrorCode ierr; 2211 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2212 PetscBLASInt bnz,one=1; 2213 Mat_SeqAIJ *x,*y; 2214 2215 PetscFunctionBegin; 2216 if (str == SAME_NONZERO_PATTERN) { 2217 PetscScalar alpha = a; 2218 x = (Mat_SeqAIJ*)xx->A->data; 2219 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2220 y = (Mat_SeqAIJ*)yy->A->data; 2221 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2222 x = (Mat_SeqAIJ*)xx->B->data; 2223 y = (Mat_SeqAIJ*)yy->B->data; 2224 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2225 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2226 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2227 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2228 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2229 } else { 2230 Mat B; 2231 PetscInt *nnz_d,*nnz_o; 2232 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2233 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2234 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2235 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2236 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2237 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2238 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2239 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2240 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2241 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2242 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2243 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2244 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2245 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2246 } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2251 2252 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2253 { 2254 #if defined(PETSC_USE_COMPLEX) 2255 PetscErrorCode ierr; 2256 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2257 2258 PetscFunctionBegin; 2259 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2260 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2261 #else 2262 PetscFunctionBegin; 2263 #endif 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2274 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2279 { 2280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2285 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2290 { 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2292 PetscErrorCode ierr; 2293 PetscInt i,*idxb = 0; 2294 PetscScalar *va,*vb; 2295 Vec vtmp; 2296 2297 PetscFunctionBegin; 2298 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2299 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2300 if (idx) { 2301 for (i=0; i<A->rmap->n; i++) { 2302 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2303 } 2304 } 2305 2306 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2307 if (idx) { 2308 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2309 } 2310 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2311 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2312 2313 for (i=0; i<A->rmap->n; i++) { 2314 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2315 va[i] = vb[i]; 2316 if (idx) idx[i] = a->garray[idxb[i]]; 2317 } 2318 } 2319 2320 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2321 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2322 ierr = PetscFree(idxb);CHKERRQ(ierr); 2323 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,*idxb = 0; 2332 PetscScalar *va,*vb; 2333 Vec vtmp; 2334 2335 PetscFunctionBegin; 2336 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2337 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2338 if (idx) { 2339 for (i=0; i<A->cmap->n; i++) { 2340 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2341 } 2342 } 2343 2344 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2345 if (idx) { 2346 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2347 } 2348 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2349 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2350 2351 for (i=0; i<A->rmap->n; i++) { 2352 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2353 va[i] = vb[i]; 2354 if (idx) idx[i] = a->garray[idxb[i]]; 2355 } 2356 } 2357 2358 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2360 ierr = PetscFree(idxb);CHKERRQ(ierr); 2361 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2366 { 2367 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2368 PetscInt n = A->rmap->n; 2369 PetscInt cstart = A->cmap->rstart; 2370 PetscInt *cmap = mat->garray; 2371 PetscInt *diagIdx, *offdiagIdx; 2372 Vec diagV, offdiagV; 2373 PetscScalar *a, *diagA, *offdiagA; 2374 PetscInt r; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2379 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2380 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2381 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2382 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2383 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2384 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2385 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2386 for (r = 0; r < n; ++r) { 2387 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2388 a[r] = diagA[r]; 2389 idx[r] = cstart + diagIdx[r]; 2390 } else { 2391 a[r] = offdiagA[r]; 2392 idx[r] = cmap[offdiagIdx[r]]; 2393 } 2394 } 2395 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2396 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2397 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2398 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2399 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2400 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2405 { 2406 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2407 PetscInt n = A->rmap->n; 2408 PetscInt cstart = A->cmap->rstart; 2409 PetscInt *cmap = mat->garray; 2410 PetscInt *diagIdx, *offdiagIdx; 2411 Vec diagV, offdiagV; 2412 PetscScalar *a, *diagA, *offdiagA; 2413 PetscInt r; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2418 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2419 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2420 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2421 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2423 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2424 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2425 for (r = 0; r < n; ++r) { 2426 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2427 a[r] = diagA[r]; 2428 idx[r] = cstart + diagIdx[r]; 2429 } else { 2430 a[r] = offdiagA[r]; 2431 idx[r] = cmap[offdiagIdx[r]]; 2432 } 2433 } 2434 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2438 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2439 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2444 { 2445 PetscErrorCode ierr; 2446 Mat *dummy; 2447 2448 PetscFunctionBegin; 2449 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2450 *newmat = *dummy; 2451 ierr = PetscFree(dummy);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2456 { 2457 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2462 A->factorerrortype = a->A->factorerrortype; 2463 PetscFunctionReturn(0); 2464 } 2465 2466 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2467 { 2468 PetscErrorCode ierr; 2469 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2470 2471 PetscFunctionBegin; 2472 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2473 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2474 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2475 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 PetscFunctionReturn(0); 2477 } 2478 2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2480 { 2481 PetscFunctionBegin; 2482 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2483 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2484 PetscFunctionReturn(0); 2485 } 2486 2487 /*@ 2488 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2489 2490 Collective on Mat 2491 2492 Input Parameters: 2493 + A - the matrix 2494 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2495 2496 Level: advanced 2497 2498 @*/ 2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2500 { 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2509 { 2510 PetscErrorCode ierr; 2511 PetscBool sc = PETSC_FALSE,flg; 2512 2513 PetscFunctionBegin; 2514 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2515 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2516 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2517 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2518 if (flg) { 2519 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2520 } 2521 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2526 { 2527 PetscErrorCode ierr; 2528 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2529 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2530 2531 PetscFunctionBegin; 2532 if (!Y->preallocated) { 2533 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2534 } else if (!aij->nz) { 2535 PetscInt nonew = aij->nonew; 2536 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2537 aij->nonew = nonew; 2538 } 2539 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2544 { 2545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2546 PetscErrorCode ierr; 2547 2548 PetscFunctionBegin; 2549 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2550 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2551 if (d) { 2552 PetscInt rstart; 2553 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2554 *d += rstart; 2555 2556 } 2557 PetscFunctionReturn(0); 2558 } 2559 2560 2561 /* -------------------------------------------------------------------*/ 2562 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2563 MatGetRow_MPIAIJ, 2564 MatRestoreRow_MPIAIJ, 2565 MatMult_MPIAIJ, 2566 /* 4*/ MatMultAdd_MPIAIJ, 2567 MatMultTranspose_MPIAIJ, 2568 MatMultTransposeAdd_MPIAIJ, 2569 0, 2570 0, 2571 0, 2572 /*10*/ 0, 2573 0, 2574 0, 2575 MatSOR_MPIAIJ, 2576 MatTranspose_MPIAIJ, 2577 /*15*/ MatGetInfo_MPIAIJ, 2578 MatEqual_MPIAIJ, 2579 MatGetDiagonal_MPIAIJ, 2580 MatDiagonalScale_MPIAIJ, 2581 MatNorm_MPIAIJ, 2582 /*20*/ MatAssemblyBegin_MPIAIJ, 2583 MatAssemblyEnd_MPIAIJ, 2584 MatSetOption_MPIAIJ, 2585 MatZeroEntries_MPIAIJ, 2586 /*24*/ MatZeroRows_MPIAIJ, 2587 0, 2588 0, 2589 0, 2590 0, 2591 /*29*/ MatSetUp_MPIAIJ, 2592 0, 2593 0, 2594 MatGetDiagonalBlock_MPIAIJ, 2595 0, 2596 /*34*/ MatDuplicate_MPIAIJ, 2597 0, 2598 0, 2599 0, 2600 0, 2601 /*39*/ MatAXPY_MPIAIJ, 2602 MatCreateSubMatrices_MPIAIJ, 2603 MatIncreaseOverlap_MPIAIJ, 2604 MatGetValues_MPIAIJ, 2605 MatCopy_MPIAIJ, 2606 /*44*/ MatGetRowMax_MPIAIJ, 2607 MatScale_MPIAIJ, 2608 MatShift_MPIAIJ, 2609 MatDiagonalSet_MPIAIJ, 2610 MatZeroRowsColumns_MPIAIJ, 2611 /*49*/ MatSetRandom_MPIAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*54*/ MatFDColoringCreate_MPIXAIJ, 2617 0, 2618 MatSetUnfactored_MPIAIJ, 2619 MatPermute_MPIAIJ, 2620 0, 2621 /*59*/ MatCreateSubMatrix_MPIAIJ, 2622 MatDestroy_MPIAIJ, 2623 MatView_MPIAIJ, 2624 0, 2625 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2626 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2627 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2632 MatGetRowMinAbs_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*75*/ MatFDColoringApply_AIJ, 2638 MatSetFromOptions_MPIAIJ, 2639 0, 2640 0, 2641 MatFindZeroDiagonals_MPIAIJ, 2642 /*80*/ 0, 2643 0, 2644 0, 2645 /*83*/ MatLoad_MPIAIJ, 2646 MatIsSymmetric_MPIAIJ, 2647 0, 2648 0, 2649 0, 2650 0, 2651 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2652 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2653 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2654 MatPtAP_MPIAIJ_MPIAIJ, 2655 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2656 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*99*/ 0, 2662 0, 2663 0, 2664 MatConjugate_MPIAIJ, 2665 0, 2666 /*104*/MatSetValuesRow_MPIAIJ, 2667 MatRealPart_MPIAIJ, 2668 MatImaginaryPart_MPIAIJ, 2669 0, 2670 0, 2671 /*109*/0, 2672 0, 2673 MatGetRowMin_MPIAIJ, 2674 0, 2675 MatMissingDiagonal_MPIAIJ, 2676 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2677 0, 2678 MatGetGhosts_MPIAIJ, 2679 0, 2680 0, 2681 /*119*/0, 2682 0, 2683 0, 2684 0, 2685 MatGetMultiProcBlock_MPIAIJ, 2686 /*124*/MatFindNonzeroRows_MPIAIJ, 2687 MatGetColumnNorms_MPIAIJ, 2688 MatInvertBlockDiagonal_MPIAIJ, 2689 0, 2690 MatCreateSubMatricesMPI_MPIAIJ, 2691 /*129*/0, 2692 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2693 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2694 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2695 0, 2696 /*134*/0, 2697 0, 2698 MatRARt_MPIAIJ_MPIAIJ, 2699 0, 2700 0, 2701 /*139*/MatSetBlockSizes_MPIAIJ, 2702 0, 2703 0, 2704 MatFDColoringSetUp_MPIXAIJ, 2705 MatFindOffBlockDiagonalEntries_MPIAIJ, 2706 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2707 }; 2708 2709 /* ----------------------------------------------------------------------------------------*/ 2710 2711 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2712 { 2713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2718 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2723 { 2724 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2729 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2730 PetscFunctionReturn(0); 2731 } 2732 2733 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2734 { 2735 Mat_MPIAIJ *b; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2740 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2741 b = (Mat_MPIAIJ*)B->data; 2742 2743 #if defined(PETSC_USE_CTABLE) 2744 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2745 #else 2746 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2747 #endif 2748 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2749 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2750 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2751 2752 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2753 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2754 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2755 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2757 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2759 2760 if (!B->preallocated) { 2761 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2762 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2763 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2764 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2765 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2766 } 2767 2768 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2769 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2770 B->preallocated = PETSC_TRUE; 2771 B->was_assembled = PETSC_FALSE; 2772 B->assembled = PETSC_FALSE;; 2773 PetscFunctionReturn(0); 2774 } 2775 2776 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2777 { 2778 Mat_MPIAIJ *b; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2783 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2784 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2785 b = (Mat_MPIAIJ*)B->data; 2786 2787 #if defined(PETSC_USE_CTABLE) 2788 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2789 #else 2790 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2791 #endif 2792 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2793 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2794 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2795 2796 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2797 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2798 B->preallocated = PETSC_TRUE; 2799 B->was_assembled = PETSC_FALSE; 2800 B->assembled = PETSC_FALSE; 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2805 { 2806 Mat mat; 2807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 *newmat = 0; 2812 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2814 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2815 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2816 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2817 a = (Mat_MPIAIJ*)mat->data; 2818 2819 mat->factortype = matin->factortype; 2820 mat->assembled = PETSC_TRUE; 2821 mat->insertmode = NOT_SET_VALUES; 2822 mat->preallocated = PETSC_TRUE; 2823 2824 a->size = oldmat->size; 2825 a->rank = oldmat->rank; 2826 a->donotstash = oldmat->donotstash; 2827 a->roworiented = oldmat->roworiented; 2828 a->rowindices = 0; 2829 a->rowvalues = 0; 2830 a->getrowactive = PETSC_FALSE; 2831 2832 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2834 2835 if (oldmat->colmap) { 2836 #if defined(PETSC_USE_CTABLE) 2837 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2840 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 #endif 2843 } else a->colmap = 0; 2844 if (oldmat->garray) { 2845 PetscInt len; 2846 len = oldmat->B->cmap->n; 2847 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2848 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2849 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2850 } else a->garray = 0; 2851 2852 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2854 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2856 2857 if (oldmat->Mvctx_mpi1) { 2858 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2860 } 2861 2862 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2863 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2864 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2865 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2866 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2867 *newmat = mat; 2868 PetscFunctionReturn(0); 2869 } 2870 2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2872 { 2873 PetscScalar *vals,*svals; 2874 MPI_Comm comm; 2875 PetscErrorCode ierr; 2876 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2877 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2878 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2879 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2880 PetscInt cend,cstart,n,*rowners; 2881 int fd; 2882 PetscInt bs = newMat->rmap->bs; 2883 2884 PetscFunctionBegin; 2885 /* force binary viewer to load .info file if it has not yet done so */ 2886 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2887 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2889 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2890 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2891 if (!rank) { 2892 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2893 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2894 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2895 } 2896 2897 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2898 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2899 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2900 if (bs < 0) bs = 1; 2901 2902 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2903 M = header[1]; N = header[2]; 2904 2905 /* If global sizes are set, check if they are consistent with that given in the file */ 2906 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2907 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2908 2909 /* determine ownership of all (block) rows */ 2910 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2911 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2912 else m = newMat->rmap->n; /* Set by user */ 2913 2914 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2915 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2916 2917 /* First process needs enough room for process with most rows */ 2918 if (!rank) { 2919 mmax = rowners[1]; 2920 for (i=2; i<=size; i++) { 2921 mmax = PetscMax(mmax, rowners[i]); 2922 } 2923 } else mmax = -1; /* unused, but compilers complain */ 2924 2925 rowners[0] = 0; 2926 for (i=2; i<=size; i++) { 2927 rowners[i] += rowners[i-1]; 2928 } 2929 rstart = rowners[rank]; 2930 rend = rowners[rank+1]; 2931 2932 /* distribute row lengths to all processors */ 2933 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2934 if (!rank) { 2935 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2936 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2937 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2938 for (j=0; j<m; j++) { 2939 procsnz[0] += ourlens[j]; 2940 } 2941 for (i=1; i<size; i++) { 2942 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2943 /* calculate the number of nonzeros on each processor */ 2944 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2945 procsnz[i] += rowlengths[j]; 2946 } 2947 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2950 } else { 2951 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2952 } 2953 2954 if (!rank) { 2955 /* determine max buffer needed and allocate it */ 2956 maxnz = 0; 2957 for (i=0; i<size; i++) { 2958 maxnz = PetscMax(maxnz,procsnz[i]); 2959 } 2960 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2961 2962 /* read in my part of the matrix column indices */ 2963 nz = procsnz[0]; 2964 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2965 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2966 2967 /* read in every one elses and ship off */ 2968 for (i=1; i<size; i++) { 2969 nz = procsnz[i]; 2970 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2971 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2972 } 2973 ierr = PetscFree(cols);CHKERRQ(ierr); 2974 } else { 2975 /* determine buffer space needed for message */ 2976 nz = 0; 2977 for (i=0; i<m; i++) { 2978 nz += ourlens[i]; 2979 } 2980 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2981 2982 /* receive message of column indices*/ 2983 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2984 } 2985 2986 /* determine column ownership if matrix is not square */ 2987 if (N != M) { 2988 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2989 else n = newMat->cmap->n; 2990 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2991 cstart = cend - n; 2992 } else { 2993 cstart = rstart; 2994 cend = rend; 2995 n = cend - cstart; 2996 } 2997 2998 /* loop over local rows, determining number of off diagonal entries */ 2999 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3000 jj = 0; 3001 for (i=0; i<m; i++) { 3002 for (j=0; j<ourlens[i]; j++) { 3003 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3004 jj++; 3005 } 3006 } 3007 3008 for (i=0; i<m; i++) { 3009 ourlens[i] -= offlens[i]; 3010 } 3011 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3012 3013 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3014 3015 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3016 3017 for (i=0; i<m; i++) { 3018 ourlens[i] += offlens[i]; 3019 } 3020 3021 if (!rank) { 3022 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3023 3024 /* read in my part of the matrix numerical values */ 3025 nz = procsnz[0]; 3026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3027 3028 /* insert into matrix */ 3029 jj = rstart; 3030 smycols = mycols; 3031 svals = vals; 3032 for (i=0; i<m; i++) { 3033 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3034 smycols += ourlens[i]; 3035 svals += ourlens[i]; 3036 jj++; 3037 } 3038 3039 /* read in other processors and ship out */ 3040 for (i=1; i<size; i++) { 3041 nz = procsnz[i]; 3042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3043 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3044 } 3045 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3046 } else { 3047 /* receive numeric values */ 3048 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3049 3050 /* receive message of values*/ 3051 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3052 3053 /* insert into matrix */ 3054 jj = rstart; 3055 smycols = mycols; 3056 svals = vals; 3057 for (i=0; i<m; i++) { 3058 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3059 smycols += ourlens[i]; 3060 svals += ourlens[i]; 3061 jj++; 3062 } 3063 } 3064 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3065 ierr = PetscFree(vals);CHKERRQ(ierr); 3066 ierr = PetscFree(mycols);CHKERRQ(ierr); 3067 ierr = PetscFree(rowners);CHKERRQ(ierr); 3068 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 /* Not scalable because of ISAllGather() unless getting all columns. */ 3074 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3075 { 3076 PetscErrorCode ierr; 3077 IS iscol_local; 3078 PetscBool isstride; 3079 PetscMPIInt lisstride=0,gisstride; 3080 3081 PetscFunctionBegin; 3082 /* check if we are grabbing all columns*/ 3083 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3084 3085 if (isstride) { 3086 PetscInt start,len,mstart,mlen; 3087 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3088 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3089 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3090 if (mstart == start && mlen-mstart == len) lisstride = 1; 3091 } 3092 3093 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3094 if (gisstride) { 3095 PetscInt N; 3096 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3097 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3098 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3099 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3100 } else { 3101 PetscInt cbs; 3102 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3103 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3104 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3105 } 3106 3107 *isseq = iscol_local; 3108 PetscFunctionReturn(0); 3109 } 3110 3111 /* 3112 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3113 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3114 3115 Input Parameters: 3116 mat - matrix 3117 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3118 i.e., mat->rstart <= isrow[i] < mat->rend 3119 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3120 i.e., mat->cstart <= iscol[i] < mat->cend 3121 Output Parameter: 3122 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3123 iscol_o - sequential column index set for retrieving mat->B 3124 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3125 */ 3126 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3127 { 3128 PetscErrorCode ierr; 3129 Vec x,cmap; 3130 const PetscInt *is_idx; 3131 PetscScalar *xarray,*cmaparray; 3132 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3133 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3134 Mat B=a->B; 3135 Vec lvec=a->lvec,lcmap; 3136 PetscInt i,cstart,cend,Bn=B->cmap->N; 3137 MPI_Comm comm; 3138 VecScatter Mvctx=a->Mvctx; 3139 3140 PetscFunctionBegin; 3141 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3142 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3143 3144 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3145 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3146 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3147 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3148 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3149 3150 /* Get start indices */ 3151 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3152 isstart -= ncols; 3153 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3154 3155 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3156 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3157 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3158 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3159 for (i=0; i<ncols; i++) { 3160 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3161 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3162 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3163 } 3164 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3165 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3166 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3167 3168 /* Get iscol_d */ 3169 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3170 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3171 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3172 3173 /* Get isrow_d */ 3174 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3175 rstart = mat->rmap->rstart; 3176 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3177 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3178 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3179 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3180 3181 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3182 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3183 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3184 3185 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3186 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3187 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3188 3189 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3190 3191 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3192 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3193 3194 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3195 /* off-process column indices */ 3196 count = 0; 3197 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3198 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3199 3200 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3201 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3202 for (i=0; i<Bn; i++) { 3203 if (PetscRealPart(xarray[i]) > -1.0) { 3204 idx[count] = i; /* local column index in off-diagonal part B */ 3205 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3206 count++; 3207 } 3208 } 3209 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3210 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3211 3212 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3213 /* cannot ensure iscol_o has same blocksize as iscol! */ 3214 3215 ierr = PetscFree(idx);CHKERRQ(ierr); 3216 *garray = cmap1; 3217 3218 ierr = VecDestroy(&x);CHKERRQ(ierr); 3219 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3220 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 3224 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3225 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3226 { 3227 PetscErrorCode ierr; 3228 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3229 Mat M = NULL; 3230 MPI_Comm comm; 3231 IS iscol_d,isrow_d,iscol_o; 3232 Mat Asub = NULL,Bsub = NULL; 3233 PetscInt n; 3234 3235 PetscFunctionBegin; 3236 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3237 3238 if (call == MAT_REUSE_MATRIX) { 3239 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3240 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3241 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3242 3243 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3244 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3245 3246 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3247 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3248 3249 /* Update diagonal and off-diagonal portions of submat */ 3250 asub = (Mat_MPIAIJ*)(*submat)->data; 3251 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3252 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3253 if (n) { 3254 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3255 } 3256 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3258 3259 } else { /* call == MAT_INITIAL_MATRIX) */ 3260 const PetscInt *garray; 3261 PetscInt BsubN; 3262 3263 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3264 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3265 3266 /* Create local submatrices Asub and Bsub */ 3267 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3268 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3269 3270 /* Create submatrix M */ 3271 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3272 3273 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3274 asub = (Mat_MPIAIJ*)M->data; 3275 3276 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3277 n = asub->B->cmap->N; 3278 if (BsubN > n) { 3279 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3280 const PetscInt *idx; 3281 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3282 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3283 3284 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3285 j = 0; 3286 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3287 for (i=0; i<n; i++) { 3288 if (j >= BsubN) break; 3289 while (subgarray[i] > garray[j]) j++; 3290 3291 if (subgarray[i] == garray[j]) { 3292 idx_new[i] = idx[j++]; 3293 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3294 } 3295 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3296 3297 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3298 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3299 3300 } else if (BsubN < n) { 3301 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3302 } 3303 3304 ierr = PetscFree(garray);CHKERRQ(ierr); 3305 *submat = M; 3306 3307 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3308 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3309 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3310 3311 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3312 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3313 3314 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3315 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3316 } 3317 PetscFunctionReturn(0); 3318 } 3319 3320 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3321 { 3322 PetscErrorCode ierr; 3323 IS iscol_local=NULL,isrow_d; 3324 PetscInt csize; 3325 PetscInt n,i,j,start,end; 3326 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3327 MPI_Comm comm; 3328 3329 PetscFunctionBegin; 3330 /* If isrow has same processor distribution as mat, 3331 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3332 if (call == MAT_REUSE_MATRIX) { 3333 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3334 if (isrow_d) { 3335 sameRowDist = PETSC_TRUE; 3336 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3337 } else { 3338 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3339 if (iscol_local) { 3340 sameRowDist = PETSC_TRUE; 3341 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3342 } 3343 } 3344 } else { 3345 /* Check if isrow has same processor distribution as mat */ 3346 sameDist[0] = PETSC_FALSE; 3347 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3348 if (!n) { 3349 sameDist[0] = PETSC_TRUE; 3350 } else { 3351 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3352 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3353 if (i >= start && j < end) { 3354 sameDist[0] = PETSC_TRUE; 3355 } 3356 } 3357 3358 /* Check if iscol has same processor distribution as mat */ 3359 sameDist[1] = PETSC_FALSE; 3360 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3361 if (!n) { 3362 sameDist[1] = PETSC_TRUE; 3363 } else { 3364 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3365 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3366 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3367 } 3368 3369 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3370 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3371 sameRowDist = tsameDist[0]; 3372 } 3373 3374 if (sameRowDist) { 3375 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3376 /* isrow and iscol have same processor distribution as mat */ 3377 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3378 PetscFunctionReturn(0); 3379 } else { /* sameRowDist */ 3380 /* isrow has same processor distribution as mat */ 3381 if (call == MAT_INITIAL_MATRIX) { 3382 PetscBool sorted; 3383 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3384 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3385 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3386 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3387 3388 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3389 if (sorted) { 3390 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3391 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394 } else { /* call == MAT_REUSE_MATRIX */ 3395 IS iscol_sub; 3396 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3397 if (iscol_sub) { 3398 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3399 PetscFunctionReturn(0); 3400 } 3401 } 3402 } 3403 } 3404 3405 /* General case: iscol -> iscol_local which has global size of iscol */ 3406 if (call == MAT_REUSE_MATRIX) { 3407 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3408 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3409 } else { 3410 if (!iscol_local) { 3411 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3412 } 3413 } 3414 3415 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3416 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3417 3418 if (call == MAT_INITIAL_MATRIX) { 3419 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3420 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3421 } 3422 PetscFunctionReturn(0); 3423 } 3424 3425 /*@C 3426 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3427 and "off-diagonal" part of the matrix in CSR format. 3428 3429 Collective on MPI_Comm 3430 3431 Input Parameters: 3432 + comm - MPI communicator 3433 . A - "diagonal" portion of matrix 3434 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3435 - garray - global index of B columns 3436 3437 Output Parameter: 3438 . mat - the matrix, with input A as its local diagonal matrix 3439 Level: advanced 3440 3441 Notes: 3442 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3443 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3444 3445 .seealso: MatCreateMPIAIJWithSplitArrays() 3446 @*/ 3447 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3448 { 3449 PetscErrorCode ierr; 3450 Mat_MPIAIJ *maij; 3451 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3452 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3453 PetscScalar *oa=b->a; 3454 Mat Bnew; 3455 PetscInt m,n,N; 3456 3457 PetscFunctionBegin; 3458 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3459 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3460 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3461 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3462 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3463 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3464 3465 /* Get global columns of mat */ 3466 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3467 3468 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3469 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3470 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3471 maij = (Mat_MPIAIJ*)(*mat)->data; 3472 3473 (*mat)->preallocated = PETSC_TRUE; 3474 3475 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3476 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3477 3478 /* Set A as diagonal portion of *mat */ 3479 maij->A = A; 3480 3481 nz = oi[m]; 3482 for (i=0; i<nz; i++) { 3483 col = oj[i]; 3484 oj[i] = garray[col]; 3485 } 3486 3487 /* Set Bnew as off-diagonal portion of *mat */ 3488 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3489 bnew = (Mat_SeqAIJ*)Bnew->data; 3490 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3491 maij->B = Bnew; 3492 3493 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3494 3495 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3496 b->free_a = PETSC_FALSE; 3497 b->free_ij = PETSC_FALSE; 3498 ierr = MatDestroy(&B);CHKERRQ(ierr); 3499 3500 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3501 bnew->free_a = PETSC_TRUE; 3502 bnew->free_ij = PETSC_TRUE; 3503 3504 /* condense columns of maij->B */ 3505 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3506 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3507 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3508 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3509 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3510 PetscFunctionReturn(0); 3511 } 3512 3513 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3514 3515 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3516 { 3517 PetscErrorCode ierr; 3518 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3519 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3520 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3521 Mat M,Msub,B=a->B; 3522 MatScalar *aa; 3523 Mat_SeqAIJ *aij; 3524 PetscInt *garray = a->garray,*colsub,Ncols; 3525 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3526 IS iscol_sub,iscmap; 3527 const PetscInt *is_idx,*cmap; 3528 PetscBool allcolumns=PETSC_FALSE; 3529 MPI_Comm comm; 3530 3531 PetscFunctionBegin; 3532 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3533 3534 if (call == MAT_REUSE_MATRIX) { 3535 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3536 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3537 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3538 3539 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3540 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3541 3542 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3543 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3544 3545 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3546 3547 } else { /* call == MAT_INITIAL_MATRIX) */ 3548 PetscBool flg; 3549 3550 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3551 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3552 3553 /* (1) iscol -> nonscalable iscol_local */ 3554 /* Check for special case: each processor gets entire matrix columns */ 3555 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3556 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3557 if (allcolumns) { 3558 iscol_sub = iscol_local; 3559 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3560 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3561 3562 } else { 3563 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3564 PetscInt *idx,*cmap1,k; 3565 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3566 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3567 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3568 count = 0; 3569 k = 0; 3570 for (i=0; i<Ncols; i++) { 3571 j = is_idx[i]; 3572 if (j >= cstart && j < cend) { 3573 /* diagonal part of mat */ 3574 idx[count] = j; 3575 cmap1[count++] = i; /* column index in submat */ 3576 } else if (Bn) { 3577 /* off-diagonal part of mat */ 3578 if (j == garray[k]) { 3579 idx[count] = j; 3580 cmap1[count++] = i; /* column index in submat */ 3581 } else if (j > garray[k]) { 3582 while (j > garray[k] && k < Bn-1) k++; 3583 if (j == garray[k]) { 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } 3587 } 3588 } 3589 } 3590 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3591 3592 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3593 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3594 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3595 3596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3597 } 3598 3599 /* (3) Create sequential Msub */ 3600 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3601 } 3602 3603 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3604 aij = (Mat_SeqAIJ*)(Msub)->data; 3605 ii = aij->i; 3606 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3607 3608 /* 3609 m - number of local rows 3610 Ncols - number of columns (same on all processors) 3611 rstart - first row in new global matrix generated 3612 */ 3613 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3614 3615 if (call == MAT_INITIAL_MATRIX) { 3616 /* (4) Create parallel newmat */ 3617 PetscMPIInt rank,size; 3618 PetscInt csize; 3619 3620 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3621 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3622 3623 /* 3624 Determine the number of non-zeros in the diagonal and off-diagonal 3625 portions of the matrix in order to do correct preallocation 3626 */ 3627 3628 /* first get start and end of "diagonal" columns */ 3629 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3630 if (csize == PETSC_DECIDE) { 3631 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3632 if (mglobal == Ncols) { /* square matrix */ 3633 nlocal = m; 3634 } else { 3635 nlocal = Ncols/size + ((Ncols % size) > rank); 3636 } 3637 } else { 3638 nlocal = csize; 3639 } 3640 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3641 rstart = rend - nlocal; 3642 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3643 3644 /* next, compute all the lengths */ 3645 jj = aij->j; 3646 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3647 olens = dlens + m; 3648 for (i=0; i<m; i++) { 3649 jend = ii[i+1] - ii[i]; 3650 olen = 0; 3651 dlen = 0; 3652 for (j=0; j<jend; j++) { 3653 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3654 else dlen++; 3655 jj++; 3656 } 3657 olens[i] = olen; 3658 dlens[i] = dlen; 3659 } 3660 3661 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3662 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3663 3664 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3665 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3666 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3667 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3668 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3669 ierr = PetscFree(dlens);CHKERRQ(ierr); 3670 3671 } else { /* call == MAT_REUSE_MATRIX */ 3672 M = *newmat; 3673 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3674 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3675 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3676 /* 3677 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3678 rather than the slower MatSetValues(). 3679 */ 3680 M->was_assembled = PETSC_TRUE; 3681 M->assembled = PETSC_FALSE; 3682 } 3683 3684 /* (5) Set values of Msub to *newmat */ 3685 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3686 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3687 3688 jj = aij->j; 3689 aa = aij->a; 3690 for (i=0; i<m; i++) { 3691 row = rstart + i; 3692 nz = ii[i+1] - ii[i]; 3693 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3694 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3695 jj += nz; aa += nz; 3696 } 3697 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3698 3699 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 3702 ierr = PetscFree(colsub);CHKERRQ(ierr); 3703 3704 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3705 if (call == MAT_INITIAL_MATRIX) { 3706 *newmat = M; 3707 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3708 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3709 3710 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3711 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3712 3713 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3714 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3715 3716 if (iscol_local) { 3717 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3718 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3719 } 3720 } 3721 PetscFunctionReturn(0); 3722 } 3723 3724 /* 3725 Not great since it makes two copies of the submatrix, first an SeqAIJ 3726 in local and then by concatenating the local matrices the end result. 3727 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3728 3729 Note: This requires a sequential iscol with all indices. 3730 */ 3731 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3732 { 3733 PetscErrorCode ierr; 3734 PetscMPIInt rank,size; 3735 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3736 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3737 Mat M,Mreuse; 3738 MatScalar *aa,*vwork; 3739 MPI_Comm comm; 3740 Mat_SeqAIJ *aij; 3741 PetscBool colflag,allcolumns=PETSC_FALSE; 3742 3743 PetscFunctionBegin; 3744 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3745 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3747 3748 /* Check for special case: each processor gets entire matrix columns */ 3749 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3750 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3751 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3752 3753 if (call == MAT_REUSE_MATRIX) { 3754 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3755 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3756 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3759 } 3760 3761 /* 3762 m - number of local rows 3763 n - number of columns (same on all processors) 3764 rstart - first row in new global matrix generated 3765 */ 3766 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3767 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3768 if (call == MAT_INITIAL_MATRIX) { 3769 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* 3774 Determine the number of non-zeros in the diagonal and off-diagonal 3775 portions of the matrix in order to do correct preallocation 3776 */ 3777 3778 /* first get start and end of "diagonal" columns */ 3779 if (csize == PETSC_DECIDE) { 3780 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3781 if (mglobal == n) { /* square matrix */ 3782 nlocal = m; 3783 } else { 3784 nlocal = n/size + ((n % size) > rank); 3785 } 3786 } else { 3787 nlocal = csize; 3788 } 3789 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3790 rstart = rend - nlocal; 3791 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3792 3793 /* next, compute all the lengths */ 3794 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3795 olens = dlens + m; 3796 for (i=0; i<m; i++) { 3797 jend = ii[i+1] - ii[i]; 3798 olen = 0; 3799 dlen = 0; 3800 for (j=0; j<jend; j++) { 3801 if (*jj < rstart || *jj >= rend) olen++; 3802 else dlen++; 3803 jj++; 3804 } 3805 olens[i] = olen; 3806 dlens[i] = dlen; 3807 } 3808 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3809 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3810 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3811 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3812 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3813 ierr = PetscFree(dlens);CHKERRQ(ierr); 3814 } else { 3815 PetscInt ml,nl; 3816 3817 M = *newmat; 3818 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3819 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3820 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3821 /* 3822 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3823 rather than the slower MatSetValues(). 3824 */ 3825 M->was_assembled = PETSC_TRUE; 3826 M->assembled = PETSC_FALSE; 3827 } 3828 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3829 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3830 ii = aij->i; 3831 jj = aij->j; 3832 aa = aij->a; 3833 for (i=0; i<m; i++) { 3834 row = rstart + i; 3835 nz = ii[i+1] - ii[i]; 3836 cwork = jj; jj += nz; 3837 vwork = aa; aa += nz; 3838 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3839 } 3840 3841 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3842 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3843 *newmat = M; 3844 3845 /* save submatrix used in processor for next request */ 3846 if (call == MAT_INITIAL_MATRIX) { 3847 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3848 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3849 } 3850 PetscFunctionReturn(0); 3851 } 3852 3853 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3854 { 3855 PetscInt m,cstart, cend,j,nnz,i,d; 3856 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3857 const PetscInt *JJ; 3858 PetscScalar *values; 3859 PetscErrorCode ierr; 3860 PetscBool nooffprocentries; 3861 3862 PetscFunctionBegin; 3863 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3864 3865 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3866 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3867 m = B->rmap->n; 3868 cstart = B->cmap->rstart; 3869 cend = B->cmap->rend; 3870 rstart = B->rmap->rstart; 3871 3872 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3873 3874 #if defined(PETSC_USE_DEBUG) 3875 for (i=0; i<m; i++) { 3876 nnz = Ii[i+1]- Ii[i]; 3877 JJ = J + Ii[i]; 3878 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3879 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3880 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3881 } 3882 #endif 3883 3884 for (i=0; i<m; i++) { 3885 nnz = Ii[i+1]- Ii[i]; 3886 JJ = J + Ii[i]; 3887 nnz_max = PetscMax(nnz_max,nnz); 3888 d = 0; 3889 for (j=0; j<nnz; j++) { 3890 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3891 } 3892 d_nnz[i] = d; 3893 o_nnz[i] = nnz - d; 3894 } 3895 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3896 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3897 3898 if (v) values = (PetscScalar*)v; 3899 else { 3900 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3901 } 3902 3903 for (i=0; i<m; i++) { 3904 ii = i + rstart; 3905 nnz = Ii[i+1]- Ii[i]; 3906 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3907 } 3908 nooffprocentries = B->nooffprocentries; 3909 B->nooffprocentries = PETSC_TRUE; 3910 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3911 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 B->nooffprocentries = nooffprocentries; 3913 3914 if (!v) { 3915 ierr = PetscFree(values);CHKERRQ(ierr); 3916 } 3917 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 /*@ 3922 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3923 (the default parallel PETSc format). 3924 3925 Collective on MPI_Comm 3926 3927 Input Parameters: 3928 + B - the matrix 3929 . i - the indices into j for the start of each local row (starts with zero) 3930 . j - the column indices for each local row (starts with zero) 3931 - v - optional values in the matrix 3932 3933 Level: developer 3934 3935 Notes: 3936 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3937 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3938 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3939 3940 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3941 3942 The format which is used for the sparse matrix input, is equivalent to a 3943 row-major ordering.. i.e for the following matrix, the input data expected is 3944 as shown 3945 3946 $ 1 0 0 3947 $ 2 0 3 P0 3948 $ ------- 3949 $ 4 5 6 P1 3950 $ 3951 $ Process0 [P0]: rows_owned=[0,1] 3952 $ i = {0,1,3} [size = nrow+1 = 2+1] 3953 $ j = {0,0,2} [size = 3] 3954 $ v = {1,2,3} [size = 3] 3955 $ 3956 $ Process1 [P1]: rows_owned=[2] 3957 $ i = {0,3} [size = nrow+1 = 1+1] 3958 $ j = {0,1,2} [size = 3] 3959 $ v = {4,5,6} [size = 3] 3960 3961 .keywords: matrix, aij, compressed row, sparse, parallel 3962 3963 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3964 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3965 @*/ 3966 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3967 { 3968 PetscErrorCode ierr; 3969 3970 PetscFunctionBegin; 3971 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 /*@C 3976 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3977 (the default parallel PETSc format). For good matrix assembly performance 3978 the user should preallocate the matrix storage by setting the parameters 3979 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3980 performance can be increased by more than a factor of 50. 3981 3982 Collective on MPI_Comm 3983 3984 Input Parameters: 3985 + B - the matrix 3986 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3987 (same value is used for all local rows) 3988 . d_nnz - array containing the number of nonzeros in the various rows of the 3989 DIAGONAL portion of the local submatrix (possibly different for each row) 3990 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3991 The size of this array is equal to the number of local rows, i.e 'm'. 3992 For matrices that will be factored, you must leave room for (and set) 3993 the diagonal entry even if it is zero. 3994 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3995 submatrix (same value is used for all local rows). 3996 - o_nnz - array containing the number of nonzeros in the various rows of the 3997 OFF-DIAGONAL portion of the local submatrix (possibly different for 3998 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3999 structure. The size of this array is equal to the number 4000 of local rows, i.e 'm'. 4001 4002 If the *_nnz parameter is given then the *_nz parameter is ignored 4003 4004 The AIJ format (also called the Yale sparse matrix format or 4005 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4006 storage. The stored row and column indices begin with zero. 4007 See Users-Manual: ch_mat for details. 4008 4009 The parallel matrix is partitioned such that the first m0 rows belong to 4010 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4011 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4012 4013 The DIAGONAL portion of the local submatrix of a processor can be defined 4014 as the submatrix which is obtained by extraction the part corresponding to 4015 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4016 first row that belongs to the processor, r2 is the last row belonging to 4017 the this processor, and c1-c2 is range of indices of the local part of a 4018 vector suitable for applying the matrix to. This is an mxn matrix. In the 4019 common case of a square matrix, the row and column ranges are the same and 4020 the DIAGONAL part is also square. The remaining portion of the local 4021 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4022 4023 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4024 4025 You can call MatGetInfo() to get information on how effective the preallocation was; 4026 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4027 You can also run with the option -info and look for messages with the string 4028 malloc in them to see if additional memory allocation was needed. 4029 4030 Example usage: 4031 4032 Consider the following 8x8 matrix with 34 non-zero values, that is 4033 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4034 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4035 as follows: 4036 4037 .vb 4038 1 2 0 | 0 3 0 | 0 4 4039 Proc0 0 5 6 | 7 0 0 | 8 0 4040 9 0 10 | 11 0 0 | 12 0 4041 ------------------------------------- 4042 13 0 14 | 15 16 17 | 0 0 4043 Proc1 0 18 0 | 19 20 21 | 0 0 4044 0 0 0 | 22 23 0 | 24 0 4045 ------------------------------------- 4046 Proc2 25 26 27 | 0 0 28 | 29 0 4047 30 0 0 | 31 32 33 | 0 34 4048 .ve 4049 4050 This can be represented as a collection of submatrices as: 4051 4052 .vb 4053 A B C 4054 D E F 4055 G H I 4056 .ve 4057 4058 Where the submatrices A,B,C are owned by proc0, D,E,F are 4059 owned by proc1, G,H,I are owned by proc2. 4060 4061 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4062 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4063 The 'M','N' parameters are 8,8, and have the same values on all procs. 4064 4065 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4066 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4067 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4068 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4069 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4070 matrix, ans [DF] as another SeqAIJ matrix. 4071 4072 When d_nz, o_nz parameters are specified, d_nz storage elements are 4073 allocated for every row of the local diagonal submatrix, and o_nz 4074 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4075 One way to choose d_nz and o_nz is to use the max nonzerors per local 4076 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4077 In this case, the values of d_nz,o_nz are: 4078 .vb 4079 proc0 : dnz = 2, o_nz = 2 4080 proc1 : dnz = 3, o_nz = 2 4081 proc2 : dnz = 1, o_nz = 4 4082 .ve 4083 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4084 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4085 for proc3. i.e we are using 12+15+10=37 storage locations to store 4086 34 values. 4087 4088 When d_nnz, o_nnz parameters are specified, the storage is specified 4089 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4090 In the above case the values for d_nnz,o_nnz are: 4091 .vb 4092 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4093 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4094 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4095 .ve 4096 Here the space allocated is sum of all the above values i.e 34, and 4097 hence pre-allocation is perfect. 4098 4099 Level: intermediate 4100 4101 .keywords: matrix, aij, compressed row, sparse, parallel 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4105 @*/ 4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4107 { 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4112 PetscValidType(B,1); 4113 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4114 PetscFunctionReturn(0); 4115 } 4116 4117 /*@ 4118 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4119 CSR format the local rows. 4120 4121 Collective on MPI_Comm 4122 4123 Input Parameters: 4124 + comm - MPI communicator 4125 . m - number of local rows (Cannot be PETSC_DECIDE) 4126 . n - This value should be the same as the local size used in creating the 4127 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4128 calculated if N is given) For square matrices n is almost always m. 4129 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4130 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4131 . i - row indices 4132 . j - column indices 4133 - a - matrix values 4134 4135 Output Parameter: 4136 . mat - the matrix 4137 4138 Level: intermediate 4139 4140 Notes: 4141 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4142 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4143 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4144 4145 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4146 4147 The format which is used for the sparse matrix input, is equivalent to a 4148 row-major ordering.. i.e for the following matrix, the input data expected is 4149 as shown 4150 4151 $ 1 0 0 4152 $ 2 0 3 P0 4153 $ ------- 4154 $ 4 5 6 P1 4155 $ 4156 $ Process0 [P0]: rows_owned=[0,1] 4157 $ i = {0,1,3} [size = nrow+1 = 2+1] 4158 $ j = {0,0,2} [size = 3] 4159 $ v = {1,2,3} [size = 3] 4160 $ 4161 $ Process1 [P1]: rows_owned=[2] 4162 $ i = {0,3} [size = nrow+1 = 1+1] 4163 $ j = {0,1,2} [size = 3] 4164 $ v = {4,5,6} [size = 3] 4165 4166 .keywords: matrix, aij, compressed row, sparse, parallel 4167 4168 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4169 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4170 @*/ 4171 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4172 { 4173 PetscErrorCode ierr; 4174 4175 PetscFunctionBegin; 4176 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4177 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4178 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4179 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4180 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4181 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4182 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4183 PetscFunctionReturn(0); 4184 } 4185 4186 /*@C 4187 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4188 (the default parallel PETSc format). For good matrix assembly performance 4189 the user should preallocate the matrix storage by setting the parameters 4190 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4191 performance can be increased by more than a factor of 50. 4192 4193 Collective on MPI_Comm 4194 4195 Input Parameters: 4196 + comm - MPI communicator 4197 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4198 This value should be the same as the local size used in creating the 4199 y vector for the matrix-vector product y = Ax. 4200 . n - This value should be the same as the local size used in creating the 4201 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4202 calculated if N is given) For square matrices n is almost always m. 4203 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4204 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4205 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4206 (same value is used for all local rows) 4207 . d_nnz - array containing the number of nonzeros in the various rows of the 4208 DIAGONAL portion of the local submatrix (possibly different for each row) 4209 or NULL, if d_nz is used to specify the nonzero structure. 4210 The size of this array is equal to the number of local rows, i.e 'm'. 4211 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4212 submatrix (same value is used for all local rows). 4213 - o_nnz - array containing the number of nonzeros in the various rows of the 4214 OFF-DIAGONAL portion of the local submatrix (possibly different for 4215 each row) or NULL, if o_nz is used to specify the nonzero 4216 structure. The size of this array is equal to the number 4217 of local rows, i.e 'm'. 4218 4219 Output Parameter: 4220 . A - the matrix 4221 4222 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4223 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4224 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4225 4226 Notes: 4227 If the *_nnz parameter is given then the *_nz parameter is ignored 4228 4229 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4230 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4231 storage requirements for this matrix. 4232 4233 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4234 processor than it must be used on all processors that share the object for 4235 that argument. 4236 4237 The user MUST specify either the local or global matrix dimensions 4238 (possibly both). 4239 4240 The parallel matrix is partitioned across processors such that the 4241 first m0 rows belong to process 0, the next m1 rows belong to 4242 process 1, the next m2 rows belong to process 2 etc.. where 4243 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4244 values corresponding to [m x N] submatrix. 4245 4246 The columns are logically partitioned with the n0 columns belonging 4247 to 0th partition, the next n1 columns belonging to the next 4248 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4249 4250 The DIAGONAL portion of the local submatrix on any given processor 4251 is the submatrix corresponding to the rows and columns m,n 4252 corresponding to the given processor. i.e diagonal matrix on 4253 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4254 etc. The remaining portion of the local submatrix [m x (N-n)] 4255 constitute the OFF-DIAGONAL portion. The example below better 4256 illustrates this concept. 4257 4258 For a square global matrix we define each processor's diagonal portion 4259 to be its local rows and the corresponding columns (a square submatrix); 4260 each processor's off-diagonal portion encompasses the remainder of the 4261 local matrix (a rectangular submatrix). 4262 4263 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4264 4265 When calling this routine with a single process communicator, a matrix of 4266 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4267 type of communicator, use the construction mechanism 4268 .vb 4269 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4270 .ve 4271 4272 $ MatCreate(...,&A); 4273 $ MatSetType(A,MATMPIAIJ); 4274 $ MatSetSizes(A, m,n,M,N); 4275 $ MatMPIAIJSetPreallocation(A,...); 4276 4277 By default, this format uses inodes (identical nodes) when possible. 4278 We search for consecutive rows with the same nonzero structure, thereby 4279 reusing matrix information to achieve increased efficiency. 4280 4281 Options Database Keys: 4282 + -mat_no_inode - Do not use inodes 4283 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4284 4285 4286 4287 Example usage: 4288 4289 Consider the following 8x8 matrix with 34 non-zero values, that is 4290 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4291 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4292 as follows 4293 4294 .vb 4295 1 2 0 | 0 3 0 | 0 4 4296 Proc0 0 5 6 | 7 0 0 | 8 0 4297 9 0 10 | 11 0 0 | 12 0 4298 ------------------------------------- 4299 13 0 14 | 15 16 17 | 0 0 4300 Proc1 0 18 0 | 19 20 21 | 0 0 4301 0 0 0 | 22 23 0 | 24 0 4302 ------------------------------------- 4303 Proc2 25 26 27 | 0 0 28 | 29 0 4304 30 0 0 | 31 32 33 | 0 34 4305 .ve 4306 4307 This can be represented as a collection of submatrices as 4308 4309 .vb 4310 A B C 4311 D E F 4312 G H I 4313 .ve 4314 4315 Where the submatrices A,B,C are owned by proc0, D,E,F are 4316 owned by proc1, G,H,I are owned by proc2. 4317 4318 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4319 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4320 The 'M','N' parameters are 8,8, and have the same values on all procs. 4321 4322 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4323 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4324 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4325 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4326 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4327 matrix, ans [DF] as another SeqAIJ matrix. 4328 4329 When d_nz, o_nz parameters are specified, d_nz storage elements are 4330 allocated for every row of the local diagonal submatrix, and o_nz 4331 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4332 One way to choose d_nz and o_nz is to use the max nonzerors per local 4333 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4334 In this case, the values of d_nz,o_nz are 4335 .vb 4336 proc0 : dnz = 2, o_nz = 2 4337 proc1 : dnz = 3, o_nz = 2 4338 proc2 : dnz = 1, o_nz = 4 4339 .ve 4340 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4341 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4342 for proc3. i.e we are using 12+15+10=37 storage locations to store 4343 34 values. 4344 4345 When d_nnz, o_nnz parameters are specified, the storage is specified 4346 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4347 In the above case the values for d_nnz,o_nnz are 4348 .vb 4349 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4350 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4351 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4352 .ve 4353 Here the space allocated is sum of all the above values i.e 34, and 4354 hence pre-allocation is perfect. 4355 4356 Level: intermediate 4357 4358 .keywords: matrix, aij, compressed row, sparse, parallel 4359 4360 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4361 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4362 @*/ 4363 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4364 { 4365 PetscErrorCode ierr; 4366 PetscMPIInt size; 4367 4368 PetscFunctionBegin; 4369 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4370 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4371 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4372 if (size > 1) { 4373 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4374 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4375 } else { 4376 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4377 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4378 } 4379 PetscFunctionReturn(0); 4380 } 4381 4382 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4383 { 4384 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4385 PetscBool flg; 4386 PetscErrorCode ierr; 4387 4388 PetscFunctionBegin; 4389 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4390 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4391 if (Ad) *Ad = a->A; 4392 if (Ao) *Ao = a->B; 4393 if (colmap) *colmap = a->garray; 4394 PetscFunctionReturn(0); 4395 } 4396 4397 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4398 { 4399 PetscErrorCode ierr; 4400 PetscInt m,N,i,rstart,nnz,Ii; 4401 PetscInt *indx; 4402 PetscScalar *values; 4403 4404 PetscFunctionBegin; 4405 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4406 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4407 PetscInt *dnz,*onz,sum,bs,cbs; 4408 4409 if (n == PETSC_DECIDE) { 4410 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4411 } 4412 /* Check sum(n) = N */ 4413 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4414 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4415 4416 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4417 rstart -= m; 4418 4419 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4420 for (i=0; i<m; i++) { 4421 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4422 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4423 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4424 } 4425 4426 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4427 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4428 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4429 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4430 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4431 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4432 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4433 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4434 } 4435 4436 /* numeric phase */ 4437 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4438 for (i=0; i<m; i++) { 4439 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4440 Ii = i + rstart; 4441 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4442 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4443 } 4444 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4445 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4446 PetscFunctionReturn(0); 4447 } 4448 4449 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4450 { 4451 PetscErrorCode ierr; 4452 PetscMPIInt rank; 4453 PetscInt m,N,i,rstart,nnz; 4454 size_t len; 4455 const PetscInt *indx; 4456 PetscViewer out; 4457 char *name; 4458 Mat B; 4459 const PetscScalar *values; 4460 4461 PetscFunctionBegin; 4462 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4463 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4464 /* Should this be the type of the diagonal block of A? */ 4465 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4466 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4467 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4468 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4469 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4470 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4471 for (i=0; i<m; i++) { 4472 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4473 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4474 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4475 } 4476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4478 4479 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4480 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4481 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4482 sprintf(name,"%s.%d",outfile,rank); 4483 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4484 ierr = PetscFree(name);CHKERRQ(ierr); 4485 ierr = MatView(B,out);CHKERRQ(ierr); 4486 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4487 ierr = MatDestroy(&B);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4492 { 4493 PetscErrorCode ierr; 4494 Mat_Merge_SeqsToMPI *merge; 4495 PetscContainer container; 4496 4497 PetscFunctionBegin; 4498 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4499 if (container) { 4500 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4509 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4510 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4511 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4512 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4513 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4514 ierr = PetscFree(merge);CHKERRQ(ierr); 4515 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4516 } 4517 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 #include <../src/mat/utils/freespace.h> 4522 #include <petscbt.h> 4523 4524 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4525 { 4526 PetscErrorCode ierr; 4527 MPI_Comm comm; 4528 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4529 PetscMPIInt size,rank,taga,*len_s; 4530 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4531 PetscInt proc,m; 4532 PetscInt **buf_ri,**buf_rj; 4533 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4534 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4535 MPI_Request *s_waits,*r_waits; 4536 MPI_Status *status; 4537 MatScalar *aa=a->a; 4538 MatScalar **abuf_r,*ba_i; 4539 Mat_Merge_SeqsToMPI *merge; 4540 PetscContainer container; 4541 4542 PetscFunctionBegin; 4543 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4544 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4545 4546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4547 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4548 4549 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4550 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4551 4552 bi = merge->bi; 4553 bj = merge->bj; 4554 buf_ri = merge->buf_ri; 4555 buf_rj = merge->buf_rj; 4556 4557 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4558 owners = merge->rowmap->range; 4559 len_s = merge->len_s; 4560 4561 /* send and recv matrix values */ 4562 /*-----------------------------*/ 4563 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4564 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4565 4566 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4567 for (proc=0,k=0; proc<size; proc++) { 4568 if (!len_s[proc]) continue; 4569 i = owners[proc]; 4570 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4571 k++; 4572 } 4573 4574 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4575 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4576 ierr = PetscFree(status);CHKERRQ(ierr); 4577 4578 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4579 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4580 4581 /* insert mat values of mpimat */ 4582 /*----------------------------*/ 4583 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4584 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4585 4586 for (k=0; k<merge->nrecv; k++) { 4587 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4588 nrows = *(buf_ri_k[k]); 4589 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4590 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4591 } 4592 4593 /* set values of ba */ 4594 m = merge->rowmap->n; 4595 for (i=0; i<m; i++) { 4596 arow = owners[rank] + i; 4597 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4598 bnzi = bi[i+1] - bi[i]; 4599 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4600 4601 /* add local non-zero vals of this proc's seqmat into ba */ 4602 anzi = ai[arow+1] - ai[arow]; 4603 aj = a->j + ai[arow]; 4604 aa = a->a + ai[arow]; 4605 nextaj = 0; 4606 for (j=0; nextaj<anzi; j++) { 4607 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4608 ba_i[j] += aa[nextaj++]; 4609 } 4610 } 4611 4612 /* add received vals into ba */ 4613 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4614 /* i-th row */ 4615 if (i == *nextrow[k]) { 4616 anzi = *(nextai[k]+1) - *nextai[k]; 4617 aj = buf_rj[k] + *(nextai[k]); 4618 aa = abuf_r[k] + *(nextai[k]); 4619 nextaj = 0; 4620 for (j=0; nextaj<anzi; j++) { 4621 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4622 ba_i[j] += aa[nextaj++]; 4623 } 4624 } 4625 nextrow[k]++; nextai[k]++; 4626 } 4627 } 4628 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4629 } 4630 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4631 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4632 4633 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4634 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4635 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4636 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4637 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4638 PetscFunctionReturn(0); 4639 } 4640 4641 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4642 { 4643 PetscErrorCode ierr; 4644 Mat B_mpi; 4645 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4646 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4647 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4648 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4649 PetscInt len,proc,*dnz,*onz,bs,cbs; 4650 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4651 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4652 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4653 MPI_Status *status; 4654 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4655 PetscBT lnkbt; 4656 Mat_Merge_SeqsToMPI *merge; 4657 PetscContainer container; 4658 4659 PetscFunctionBegin; 4660 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4661 4662 /* make sure it is a PETSc comm */ 4663 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4665 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4666 4667 ierr = PetscNew(&merge);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4669 4670 /* determine row ownership */ 4671 /*---------------------------------------------------------*/ 4672 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4674 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4675 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4676 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4677 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4678 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4679 4680 m = merge->rowmap->n; 4681 owners = merge->rowmap->range; 4682 4683 /* determine the number of messages to send, their lengths */ 4684 /*---------------------------------------------------------*/ 4685 len_s = merge->len_s; 4686 4687 len = 0; /* length of buf_si[] */ 4688 merge->nsend = 0; 4689 for (proc=0; proc<size; proc++) { 4690 len_si[proc] = 0; 4691 if (proc == rank) { 4692 len_s[proc] = 0; 4693 } else { 4694 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4695 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4696 } 4697 if (len_s[proc]) { 4698 merge->nsend++; 4699 nrows = 0; 4700 for (i=owners[proc]; i<owners[proc+1]; i++) { 4701 if (ai[i+1] > ai[i]) nrows++; 4702 } 4703 len_si[proc] = 2*(nrows+1); 4704 len += len_si[proc]; 4705 } 4706 } 4707 4708 /* determine the number and length of messages to receive for ij-structure */ 4709 /*-------------------------------------------------------------------------*/ 4710 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4711 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4712 4713 /* post the Irecv of j-structure */ 4714 /*-------------------------------*/ 4715 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4716 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4717 4718 /* post the Isend of j-structure */ 4719 /*--------------------------------*/ 4720 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4721 4722 for (proc=0, k=0; proc<size; proc++) { 4723 if (!len_s[proc]) continue; 4724 i = owners[proc]; 4725 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4726 k++; 4727 } 4728 4729 /* receives and sends of j-structure are complete */ 4730 /*------------------------------------------------*/ 4731 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4732 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4733 4734 /* send and recv i-structure */ 4735 /*---------------------------*/ 4736 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4737 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4738 4739 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4740 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4741 for (proc=0,k=0; proc<size; proc++) { 4742 if (!len_s[proc]) continue; 4743 /* form outgoing message for i-structure: 4744 buf_si[0]: nrows to be sent 4745 [1:nrows]: row index (global) 4746 [nrows+1:2*nrows+1]: i-structure index 4747 */ 4748 /*-------------------------------------------*/ 4749 nrows = len_si[proc]/2 - 1; 4750 buf_si_i = buf_si + nrows+1; 4751 buf_si[0] = nrows; 4752 buf_si_i[0] = 0; 4753 nrows = 0; 4754 for (i=owners[proc]; i<owners[proc+1]; i++) { 4755 anzi = ai[i+1] - ai[i]; 4756 if (anzi) { 4757 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4758 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4759 nrows++; 4760 } 4761 } 4762 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4763 k++; 4764 buf_si += len_si[proc]; 4765 } 4766 4767 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4768 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4769 4770 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4771 for (i=0; i<merge->nrecv; i++) { 4772 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4773 } 4774 4775 ierr = PetscFree(len_si);CHKERRQ(ierr); 4776 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4777 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4778 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4779 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4780 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4781 ierr = PetscFree(status);CHKERRQ(ierr); 4782 4783 /* compute a local seq matrix in each processor */ 4784 /*----------------------------------------------*/ 4785 /* allocate bi array and free space for accumulating nonzero column info */ 4786 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4787 bi[0] = 0; 4788 4789 /* create and initialize a linked list */ 4790 nlnk = N+1; 4791 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4792 4793 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4794 len = ai[owners[rank+1]] - ai[owners[rank]]; 4795 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4796 4797 current_space = free_space; 4798 4799 /* determine symbolic info for each local row */ 4800 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4801 4802 for (k=0; k<merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *buf_ri_k[k]; 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4810 len = 0; 4811 for (i=0; i<m; i++) { 4812 bnzi = 0; 4813 /* add local non-zero cols of this proc's seqmat into lnk */ 4814 arow = owners[rank] + i; 4815 anzi = ai[arow+1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 /* add received col data into lnk */ 4820 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4821 if (i == *nextrow[k]) { /* i-th row */ 4822 anzi = *(nextai[k]+1) - *nextai[k]; 4823 aj = buf_rj[k] + *nextai[k]; 4824 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4825 bnzi += nlnk; 4826 nextrow[k]++; nextai[k]++; 4827 } 4828 } 4829 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4830 4831 /* if free space is not available, make more free space */ 4832 if (current_space->local_remaining<bnzi) { 4833 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4834 nspacedouble++; 4835 } 4836 /* copy data into free space, then initialize lnk */ 4837 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4838 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4839 4840 current_space->array += bnzi; 4841 current_space->local_used += bnzi; 4842 current_space->local_remaining -= bnzi; 4843 4844 bi[i+1] = bi[i] + bnzi; 4845 } 4846 4847 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4848 4849 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4850 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4851 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4852 4853 /* create symbolic parallel matrix B_mpi */ 4854 /*---------------------------------------*/ 4855 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4856 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4857 if (n==PETSC_DECIDE) { 4858 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4859 } else { 4860 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4861 } 4862 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4863 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4864 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4865 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4866 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4867 4868 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4869 B_mpi->assembled = PETSC_FALSE; 4870 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4871 merge->bi = bi; 4872 merge->bj = bj; 4873 merge->buf_ri = buf_ri; 4874 merge->buf_rj = buf_rj; 4875 merge->coi = NULL; 4876 merge->coj = NULL; 4877 merge->owners_co = NULL; 4878 4879 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4880 4881 /* attach the supporting struct to B_mpi for reuse */ 4882 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4883 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4884 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4885 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4886 *mpimat = B_mpi; 4887 4888 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 /*@C 4893 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4894 matrices from each processor 4895 4896 Collective on MPI_Comm 4897 4898 Input Parameters: 4899 + comm - the communicators the parallel matrix will live on 4900 . seqmat - the input sequential matrices 4901 . m - number of local rows (or PETSC_DECIDE) 4902 . n - number of local columns (or PETSC_DECIDE) 4903 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4904 4905 Output Parameter: 4906 . mpimat - the parallel matrix generated 4907 4908 Level: advanced 4909 4910 Notes: 4911 The dimensions of the sequential matrix in each processor MUST be the same. 4912 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4913 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4914 @*/ 4915 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4916 { 4917 PetscErrorCode ierr; 4918 PetscMPIInt size; 4919 4920 PetscFunctionBegin; 4921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4922 if (size == 1) { 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4928 } 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 if (scall == MAT_INITIAL_MATRIX) { 4934 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4935 } 4936 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4937 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 /*@ 4942 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4943 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4944 with MatGetSize() 4945 4946 Not Collective 4947 4948 Input Parameters: 4949 + A - the matrix 4950 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4951 4952 Output Parameter: 4953 . A_loc - the local sequential matrix generated 4954 4955 Level: developer 4956 4957 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4958 4959 @*/ 4960 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4961 { 4962 PetscErrorCode ierr; 4963 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4964 Mat_SeqAIJ *mat,*a,*b; 4965 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4966 MatScalar *aa,*ba,*cam; 4967 PetscScalar *ca; 4968 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4969 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4970 PetscBool match; 4971 MPI_Comm comm; 4972 PetscMPIInt size; 4973 4974 PetscFunctionBegin; 4975 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4976 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4977 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4978 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4979 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4980 4981 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4982 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4983 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4984 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4985 aa = a->a; ba = b->a; 4986 if (scall == MAT_INITIAL_MATRIX) { 4987 if (size == 1) { 4988 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4989 PetscFunctionReturn(0); 4990 } 4991 4992 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4993 ci[0] = 0; 4994 for (i=0; i<am; i++) { 4995 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4996 } 4997 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4998 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4999 k = 0; 5000 for (i=0; i<am; i++) { 5001 ncols_o = bi[i+1] - bi[i]; 5002 ncols_d = ai[i+1] - ai[i]; 5003 /* off-diagonal portion of A */ 5004 for (jo=0; jo<ncols_o; jo++) { 5005 col = cmap[*bj]; 5006 if (col >= cstart) break; 5007 cj[k] = col; bj++; 5008 ca[k++] = *ba++; 5009 } 5010 /* diagonal portion of A */ 5011 for (j=0; j<ncols_d; j++) { 5012 cj[k] = cstart + *aj++; 5013 ca[k++] = *aa++; 5014 } 5015 /* off-diagonal portion of A */ 5016 for (j=jo; j<ncols_o; j++) { 5017 cj[k] = cmap[*bj++]; 5018 ca[k++] = *ba++; 5019 } 5020 } 5021 /* put together the new matrix */ 5022 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5023 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5024 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5025 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5026 mat->free_a = PETSC_TRUE; 5027 mat->free_ij = PETSC_TRUE; 5028 mat->nonew = 0; 5029 } else if (scall == MAT_REUSE_MATRIX) { 5030 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5031 ci = mat->i; cj = mat->j; cam = mat->a; 5032 for (i=0; i<am; i++) { 5033 /* off-diagonal portion of A */ 5034 ncols_o = bi[i+1] - bi[i]; 5035 for (jo=0; jo<ncols_o; jo++) { 5036 col = cmap[*bj]; 5037 if (col >= cstart) break; 5038 *cam++ = *ba++; bj++; 5039 } 5040 /* diagonal portion of A */ 5041 ncols_d = ai[i+1] - ai[i]; 5042 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5043 /* off-diagonal portion of A */ 5044 for (j=jo; j<ncols_o; j++) { 5045 *cam++ = *ba++; bj++; 5046 } 5047 } 5048 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5049 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5050 PetscFunctionReturn(0); 5051 } 5052 5053 /*@C 5054 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5055 5056 Not Collective 5057 5058 Input Parameters: 5059 + A - the matrix 5060 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5061 - row, col - index sets of rows and columns to extract (or NULL) 5062 5063 Output Parameter: 5064 . A_loc - the local sequential matrix generated 5065 5066 Level: developer 5067 5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5069 5070 @*/ 5071 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5072 { 5073 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5074 PetscErrorCode ierr; 5075 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5076 IS isrowa,iscola; 5077 Mat *aloc; 5078 PetscBool match; 5079 5080 PetscFunctionBegin; 5081 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5082 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5083 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5084 if (!row) { 5085 start = A->rmap->rstart; end = A->rmap->rend; 5086 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5087 } else { 5088 isrowa = *row; 5089 } 5090 if (!col) { 5091 start = A->cmap->rstart; 5092 cmap = a->garray; 5093 nzA = a->A->cmap->n; 5094 nzB = a->B->cmap->n; 5095 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5096 ncols = 0; 5097 for (i=0; i<nzB; i++) { 5098 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5099 else break; 5100 } 5101 imark = i; 5102 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5103 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5104 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5105 } else { 5106 iscola = *col; 5107 } 5108 if (scall != MAT_INITIAL_MATRIX) { 5109 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5110 aloc[0] = *A_loc; 5111 } 5112 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5113 if (!col) { /* attach global id of condensed columns */ 5114 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5115 } 5116 *A_loc = aloc[0]; 5117 ierr = PetscFree(aloc);CHKERRQ(ierr); 5118 if (!row) { 5119 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5120 } 5121 if (!col) { 5122 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5123 } 5124 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 5128 /*@C 5129 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5130 5131 Collective on Mat 5132 5133 Input Parameters: 5134 + A,B - the matrices in mpiaij format 5135 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5136 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5137 5138 Output Parameter: 5139 + rowb, colb - index sets of rows and columns of B to extract 5140 - B_seq - the sequential matrix generated 5141 5142 Level: developer 5143 5144 @*/ 5145 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5146 { 5147 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5148 PetscErrorCode ierr; 5149 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5150 IS isrowb,iscolb; 5151 Mat *bseq=NULL; 5152 5153 PetscFunctionBegin; 5154 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5155 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5156 } 5157 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5158 5159 if (scall == MAT_INITIAL_MATRIX) { 5160 start = A->cmap->rstart; 5161 cmap = a->garray; 5162 nzA = a->A->cmap->n; 5163 nzB = a->B->cmap->n; 5164 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5165 ncols = 0; 5166 for (i=0; i<nzB; i++) { /* row < local row index */ 5167 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5168 else break; 5169 } 5170 imark = i; 5171 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5172 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5173 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5174 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5175 } else { 5176 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5177 isrowb = *rowb; iscolb = *colb; 5178 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5179 bseq[0] = *B_seq; 5180 } 5181 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5182 *B_seq = bseq[0]; 5183 ierr = PetscFree(bseq);CHKERRQ(ierr); 5184 if (!rowb) { 5185 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5186 } else { 5187 *rowb = isrowb; 5188 } 5189 if (!colb) { 5190 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5191 } else { 5192 *colb = iscolb; 5193 } 5194 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5195 PetscFunctionReturn(0); 5196 } 5197 5198 /* 5199 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5200 of the OFF-DIAGONAL portion of local A 5201 5202 Collective on Mat 5203 5204 Input Parameters: 5205 + A,B - the matrices in mpiaij format 5206 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5207 5208 Output Parameter: 5209 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5210 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5211 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5212 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5213 5214 Level: developer 5215 5216 */ 5217 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5218 { 5219 VecScatter_MPI_General *gen_to,*gen_from; 5220 PetscErrorCode ierr; 5221 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5222 Mat_SeqAIJ *b_oth; 5223 VecScatter ctx; 5224 MPI_Comm comm; 5225 PetscMPIInt *rprocs,*sprocs,tag,rank; 5226 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5227 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5228 PetscScalar *b_otha,*bufa,*bufA,*vals; 5229 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5230 MPI_Request *rwaits = NULL,*swaits = NULL; 5231 MPI_Status *sstatus,rstatus; 5232 PetscMPIInt jj,size; 5233 VecScatterType type; 5234 PetscBool mpi1; 5235 5236 PetscFunctionBegin; 5237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5238 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5239 5240 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5241 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5242 } 5243 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5244 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5245 5246 if (size == 1) { 5247 startsj_s = NULL; 5248 bufa_ptr = NULL; 5249 *B_oth = NULL; 5250 PetscFunctionReturn(0); 5251 } 5252 5253 ctx = a->Mvctx; 5254 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5255 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5256 if (!mpi1) { 5257 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5258 thus create a->Mvctx_mpi1 */ 5259 if (!a->Mvctx_mpi1) { 5260 a->Mvctx_mpi1_flg = PETSC_TRUE; 5261 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5262 } 5263 ctx = a->Mvctx_mpi1; 5264 } 5265 tag = ((PetscObject)ctx)->tag; 5266 5267 gen_to = (VecScatter_MPI_General*)ctx->todata; 5268 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5269 nrecvs = gen_from->n; 5270 nsends = gen_to->n; 5271 5272 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5273 srow = gen_to->indices; /* local row index to be sent */ 5274 sstarts = gen_to->starts; 5275 sprocs = gen_to->procs; 5276 sstatus = gen_to->sstatus; 5277 sbs = gen_to->bs; 5278 rstarts = gen_from->starts; 5279 rprocs = gen_from->procs; 5280 rbs = gen_from->bs; 5281 5282 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5283 if (scall == MAT_INITIAL_MATRIX) { 5284 /* i-array */ 5285 /*---------*/ 5286 /* post receives */ 5287 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5288 for (i=0; i<nrecvs; i++) { 5289 rowlen = rvalues + rstarts[i]*rbs; 5290 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5291 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5292 } 5293 5294 /* pack the outgoing message */ 5295 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5296 5297 sstartsj[0] = 0; 5298 rstartsj[0] = 0; 5299 len = 0; /* total length of j or a array to be sent */ 5300 k = 0; 5301 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5302 for (i=0; i<nsends; i++) { 5303 rowlen = svalues + sstarts[i]*sbs; 5304 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5305 for (j=0; j<nrows; j++) { 5306 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5307 for (l=0; l<sbs; l++) { 5308 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5309 5310 rowlen[j*sbs+l] = ncols; 5311 5312 len += ncols; 5313 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5314 } 5315 k++; 5316 } 5317 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5318 5319 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5320 } 5321 /* recvs and sends of i-array are completed */ 5322 i = nrecvs; 5323 while (i--) { 5324 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5325 } 5326 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5327 ierr = PetscFree(svalues);CHKERRQ(ierr); 5328 5329 /* allocate buffers for sending j and a arrays */ 5330 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5331 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5332 5333 /* create i-array of B_oth */ 5334 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5335 5336 b_othi[0] = 0; 5337 len = 0; /* total length of j or a array to be received */ 5338 k = 0; 5339 for (i=0; i<nrecvs; i++) { 5340 rowlen = rvalues + rstarts[i]*rbs; 5341 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5342 for (j=0; j<nrows; j++) { 5343 b_othi[k+1] = b_othi[k] + rowlen[j]; 5344 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5345 k++; 5346 } 5347 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5348 } 5349 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5350 5351 /* allocate space for j and a arrrays of B_oth */ 5352 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5354 5355 /* j-array */ 5356 /*---------*/ 5357 /* post receives of j-array */ 5358 for (i=0; i<nrecvs; i++) { 5359 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5360 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5361 } 5362 5363 /* pack the outgoing message j-array */ 5364 k = 0; 5365 for (i=0; i<nsends; i++) { 5366 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5367 bufJ = bufj+sstartsj[i]; 5368 for (j=0; j<nrows; j++) { 5369 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5370 for (ll=0; ll<sbs; ll++) { 5371 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5372 for (l=0; l<ncols; l++) { 5373 *bufJ++ = cols[l]; 5374 } 5375 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5376 } 5377 } 5378 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5379 } 5380 5381 /* recvs and sends of j-array are completed */ 5382 i = nrecvs; 5383 while (i--) { 5384 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5385 } 5386 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5387 } else if (scall == MAT_REUSE_MATRIX) { 5388 sstartsj = *startsj_s; 5389 rstartsj = *startsj_r; 5390 bufa = *bufa_ptr; 5391 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5392 b_otha = b_oth->a; 5393 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5394 5395 /* a-array */ 5396 /*---------*/ 5397 /* post receives of a-array */ 5398 for (i=0; i<nrecvs; i++) { 5399 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5400 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5401 } 5402 5403 /* pack the outgoing message a-array */ 5404 k = 0; 5405 for (i=0; i<nsends; i++) { 5406 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5407 bufA = bufa+sstartsj[i]; 5408 for (j=0; j<nrows; j++) { 5409 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5410 for (ll=0; ll<sbs; ll++) { 5411 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5412 for (l=0; l<ncols; l++) { 5413 *bufA++ = vals[l]; 5414 } 5415 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5416 } 5417 } 5418 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5419 } 5420 /* recvs and sends of a-array are completed */ 5421 i = nrecvs; 5422 while (i--) { 5423 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5424 } 5425 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5426 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5427 5428 if (scall == MAT_INITIAL_MATRIX) { 5429 /* put together the new matrix */ 5430 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5431 5432 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5433 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5434 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5435 b_oth->free_a = PETSC_TRUE; 5436 b_oth->free_ij = PETSC_TRUE; 5437 b_oth->nonew = 0; 5438 5439 ierr = PetscFree(bufj);CHKERRQ(ierr); 5440 if (!startsj_s || !bufa_ptr) { 5441 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5442 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5443 } else { 5444 *startsj_s = sstartsj; 5445 *startsj_r = rstartsj; 5446 *bufa_ptr = bufa; 5447 } 5448 } 5449 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5450 PetscFunctionReturn(0); 5451 } 5452 5453 /*@C 5454 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5455 5456 Not Collective 5457 5458 Input Parameters: 5459 . A - The matrix in mpiaij format 5460 5461 Output Parameter: 5462 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5463 . colmap - A map from global column index to local index into lvec 5464 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5465 5466 Level: developer 5467 5468 @*/ 5469 #if defined(PETSC_USE_CTABLE) 5470 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5471 #else 5472 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5473 #endif 5474 { 5475 Mat_MPIAIJ *a; 5476 5477 PetscFunctionBegin; 5478 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5479 PetscValidPointer(lvec, 2); 5480 PetscValidPointer(colmap, 3); 5481 PetscValidPointer(multScatter, 4); 5482 a = (Mat_MPIAIJ*) A->data; 5483 if (lvec) *lvec = a->lvec; 5484 if (colmap) *colmap = a->colmap; 5485 if (multScatter) *multScatter = a->Mvctx; 5486 PetscFunctionReturn(0); 5487 } 5488 5489 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5490 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5491 #if defined(PETSC_HAVE_MKL_SPARSE) 5492 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5493 #endif 5494 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5495 #if defined(PETSC_HAVE_ELEMENTAL) 5496 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5497 #endif 5498 #if defined(PETSC_HAVE_HYPRE) 5499 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5500 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5501 #endif 5502 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5503 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5504 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5505 5506 /* 5507 Computes (B'*A')' since computing B*A directly is untenable 5508 5509 n p p 5510 ( ) ( ) ( ) 5511 m ( A ) * n ( B ) = m ( C ) 5512 ( ) ( ) ( ) 5513 5514 */ 5515 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5516 { 5517 PetscErrorCode ierr; 5518 Mat At,Bt,Ct; 5519 5520 PetscFunctionBegin; 5521 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5522 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5523 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5524 ierr = MatDestroy(&At);CHKERRQ(ierr); 5525 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5526 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5527 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5528 PetscFunctionReturn(0); 5529 } 5530 5531 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5532 { 5533 PetscErrorCode ierr; 5534 PetscInt m=A->rmap->n,n=B->cmap->n; 5535 Mat Cmat; 5536 5537 PetscFunctionBegin; 5538 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5539 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5540 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5541 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5542 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5543 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5544 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5545 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5546 5547 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5548 5549 *C = Cmat; 5550 PetscFunctionReturn(0); 5551 } 5552 5553 /* ----------------------------------------------------------------*/ 5554 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5555 { 5556 PetscErrorCode ierr; 5557 5558 PetscFunctionBegin; 5559 if (scall == MAT_INITIAL_MATRIX) { 5560 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5561 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5562 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5563 } 5564 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5565 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5566 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5567 PetscFunctionReturn(0); 5568 } 5569 5570 /*MC 5571 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5572 5573 Options Database Keys: 5574 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5575 5576 Level: beginner 5577 5578 .seealso: MatCreateAIJ() 5579 M*/ 5580 5581 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5582 { 5583 Mat_MPIAIJ *b; 5584 PetscErrorCode ierr; 5585 PetscMPIInt size; 5586 5587 PetscFunctionBegin; 5588 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5589 5590 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5591 B->data = (void*)b; 5592 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5593 B->assembled = PETSC_FALSE; 5594 B->insertmode = NOT_SET_VALUES; 5595 b->size = size; 5596 5597 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5598 5599 /* build cache for off array entries formed */ 5600 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5601 5602 b->donotstash = PETSC_FALSE; 5603 b->colmap = 0; 5604 b->garray = 0; 5605 b->roworiented = PETSC_TRUE; 5606 5607 /* stuff used for matrix vector multiply */ 5608 b->lvec = NULL; 5609 b->Mvctx = NULL; 5610 5611 /* stuff for MatGetRow() */ 5612 b->rowindices = 0; 5613 b->rowvalues = 0; 5614 b->getrowactive = PETSC_FALSE; 5615 5616 /* flexible pointer used in CUSP/CUSPARSE classes */ 5617 b->spptr = NULL; 5618 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5626 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5628 #if defined(PETSC_HAVE_MKL_SPARSE) 5629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5630 #endif 5631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5633 #if defined(PETSC_HAVE_ELEMENTAL) 5634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5635 #endif 5636 #if defined(PETSC_HAVE_HYPRE) 5637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5638 #endif 5639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5644 #if defined(PETSC_HAVE_HYPRE) 5645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5646 #endif 5647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5648 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5649 PetscFunctionReturn(0); 5650 } 5651 5652 /*@C 5653 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5654 and "off-diagonal" part of the matrix in CSR format. 5655 5656 Collective on MPI_Comm 5657 5658 Input Parameters: 5659 + comm - MPI communicator 5660 . m - number of local rows (Cannot be PETSC_DECIDE) 5661 . n - This value should be the same as the local size used in creating the 5662 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5663 calculated if N is given) For square matrices n is almost always m. 5664 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5665 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5666 . i - row indices for "diagonal" portion of matrix 5667 . j - column indices 5668 . a - matrix values 5669 . oi - row indices for "off-diagonal" portion of matrix 5670 . oj - column indices 5671 - oa - matrix values 5672 5673 Output Parameter: 5674 . mat - the matrix 5675 5676 Level: advanced 5677 5678 Notes: 5679 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5680 must free the arrays once the matrix has been destroyed and not before. 5681 5682 The i and j indices are 0 based 5683 5684 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5685 5686 This sets local rows and cannot be used to set off-processor values. 5687 5688 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5689 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5690 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5691 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5692 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5693 communication if it is known that only local entries will be set. 5694 5695 .keywords: matrix, aij, compressed row, sparse, parallel 5696 5697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5698 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5699 @*/ 5700 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5701 { 5702 PetscErrorCode ierr; 5703 Mat_MPIAIJ *maij; 5704 5705 PetscFunctionBegin; 5706 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5707 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5708 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5709 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5710 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5711 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5712 maij = (Mat_MPIAIJ*) (*mat)->data; 5713 5714 (*mat)->preallocated = PETSC_TRUE; 5715 5716 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5717 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5718 5719 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5721 5722 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5723 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5724 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5725 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5726 5727 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5728 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5731 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5732 PetscFunctionReturn(0); 5733 } 5734 5735 /* 5736 Special version for direct calls from Fortran 5737 */ 5738 #include <petsc/private/fortranimpl.h> 5739 5740 /* Change these macros so can be used in void function */ 5741 #undef CHKERRQ 5742 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5743 #undef SETERRQ2 5744 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5745 #undef SETERRQ3 5746 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5747 #undef SETERRQ 5748 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5749 5750 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5751 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5752 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5753 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5754 #else 5755 #endif 5756 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5757 { 5758 Mat mat = *mmat; 5759 PetscInt m = *mm, n = *mn; 5760 InsertMode addv = *maddv; 5761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5762 PetscScalar value; 5763 PetscErrorCode ierr; 5764 5765 MatCheckPreallocated(mat,1); 5766 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5767 5768 #if defined(PETSC_USE_DEBUG) 5769 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5770 #endif 5771 { 5772 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5773 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5774 PetscBool roworiented = aij->roworiented; 5775 5776 /* Some Variables required in the macro */ 5777 Mat A = aij->A; 5778 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5779 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5780 MatScalar *aa = a->a; 5781 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5782 Mat B = aij->B; 5783 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5784 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5785 MatScalar *ba = b->a; 5786 5787 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5788 PetscInt nonew = a->nonew; 5789 MatScalar *ap1,*ap2; 5790 5791 PetscFunctionBegin; 5792 for (i=0; i<m; i++) { 5793 if (im[i] < 0) continue; 5794 #if defined(PETSC_USE_DEBUG) 5795 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5796 #endif 5797 if (im[i] >= rstart && im[i] < rend) { 5798 row = im[i] - rstart; 5799 lastcol1 = -1; 5800 rp1 = aj + ai[row]; 5801 ap1 = aa + ai[row]; 5802 rmax1 = aimax[row]; 5803 nrow1 = ailen[row]; 5804 low1 = 0; 5805 high1 = nrow1; 5806 lastcol2 = -1; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 5814 for (j=0; j<n; j++) { 5815 if (roworiented) value = v[i*n+j]; 5816 else value = v[i+j*m]; 5817 if (in[j] >= cstart && in[j] < cend) { 5818 col = in[j] - cstart; 5819 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5820 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5821 } else if (in[j] < 0) continue; 5822 #if defined(PETSC_USE_DEBUG) 5823 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5824 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5825 #endif 5826 else { 5827 if (mat->was_assembled) { 5828 if (!aij->colmap) { 5829 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5830 } 5831 #if defined(PETSC_USE_CTABLE) 5832 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5833 col--; 5834 #else 5835 col = aij->colmap[in[j]] - 1; 5836 #endif 5837 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5838 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5839 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5840 col = in[j]; 5841 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5842 B = aij->B; 5843 b = (Mat_SeqAIJ*)B->data; 5844 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5845 rp2 = bj + bi[row]; 5846 ap2 = ba + bi[row]; 5847 rmax2 = bimax[row]; 5848 nrow2 = bilen[row]; 5849 low2 = 0; 5850 high2 = nrow2; 5851 bm = aij->B->rmap->n; 5852 ba = b->a; 5853 } 5854 } else col = in[j]; 5855 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5856 } 5857 } 5858 } else if (!aij->donotstash) { 5859 if (roworiented) { 5860 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5861 } else { 5862 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5863 } 5864 } 5865 } 5866 } 5867 PetscFunctionReturnVoid(); 5868 } 5869 5870