1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 Mat A = aij->A; /* diagonal part of the matrix */ 639 Mat B = aij->B; /* offdiagonal part of the matrix */ 640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 641 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 642 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 643 PetscInt *ailen = a->ilen,*aj = a->j; 644 PetscInt *bilen = b->ilen,*bj = b->j; 645 PetscInt am = aij->A->rmap->n,i,j; 646 PetscInt col, diag_so_far=0, offd_so_far=0; 647 648 PetscFunctionBegin; 649 /* Iterate over all rows of the matrix */ 650 for (j=0; j<am; j++) { 651 for (i=0; i<dnz[j]+onz[j]; i++) { 652 col = i + mat_i[j]; 653 /* If column is in the diagonal */ 654 if (mat_j[col] >= cstart && mat_j[col] < cend) { 655 aj[diag_so_far++] = mat_j[col] - cstart; 656 } else { /* off-diagonal entries */ 657 bj[offd_so_far++] = mat_j[col]; 658 } 659 } 660 ailen[j] = dnz[j]; 661 bilen[j] = onz[j]; 662 } 663 PetscFunctionReturn(0); 664 } 665 666 667 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscErrorCode ierr; 671 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 672 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 673 674 PetscFunctionBegin; 675 for (i=0; i<m; i++) { 676 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 677 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 678 if (idxm[i] >= rstart && idxm[i] < rend) { 679 row = idxm[i] - rstart; 680 for (j=0; j<n; j++) { 681 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 682 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 683 if (idxn[j] >= cstart && idxn[j] < cend) { 684 col = idxn[j] - cstart; 685 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 686 } else { 687 if (!aij->colmap) { 688 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 689 } 690 #if defined(PETSC_USE_CTABLE) 691 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 692 col--; 693 #else 694 col = aij->colmap[idxn[j]] - 1; 695 #endif 696 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 697 else { 698 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 699 } 700 } 701 } 702 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 708 709 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 710 { 711 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 712 PetscErrorCode ierr; 713 PetscInt nstash,reallocs; 714 715 PetscFunctionBegin; 716 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 717 718 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 719 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 720 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 725 { 726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 727 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 728 PetscErrorCode ierr; 729 PetscMPIInt n; 730 PetscInt i,j,rstart,ncols,flg; 731 PetscInt *row,*col; 732 PetscBool other_disassembled; 733 PetscScalar *val; 734 735 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 736 737 PetscFunctionBegin; 738 if (!aij->donotstash && !mat->nooffprocentries) { 739 while (1) { 740 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 741 if (!flg) break; 742 743 for (i=0; i<n; ) { 744 /* Now identify the consecutive vals belonging to the same row */ 745 for (j=i,rstart=row[j]; j<n; j++) { 746 if (row[j] != rstart) break; 747 } 748 if (j < n) ncols = j-i; 749 else ncols = n-i; 750 /* Now assemble all these values with a single function call */ 751 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 752 753 i = j; 754 } 755 } 756 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 757 } 758 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 759 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 760 761 /* determine if any processor has disassembled, if so we must 762 also disassemble ourselfs, in order that we may reassemble. */ 763 /* 764 if nonzero structure of submatrix B cannot change then we know that 765 no processor disassembled thus we can skip this stuff 766 */ 767 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 768 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 769 if (mat->was_assembled && !other_disassembled) { 770 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 771 } 772 } 773 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 774 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 775 } 776 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 777 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 778 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 779 780 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 781 782 aij->rowvalues = 0; 783 784 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 785 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 786 787 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 788 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 789 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 790 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 796 { 797 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 798 PetscErrorCode ierr; 799 800 PetscFunctionBegin; 801 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 802 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 806 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 807 { 808 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 809 PetscInt *lrows; 810 PetscInt r, len; 811 PetscBool cong; 812 PetscErrorCode ierr; 813 814 PetscFunctionBegin; 815 /* get locally owned rows */ 816 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 817 /* fix right hand side if needed */ 818 if (x && b) { 819 const PetscScalar *xx; 820 PetscScalar *bb; 821 822 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 823 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 824 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 825 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 826 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 827 } 828 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 829 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 830 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 831 if ((diag != 0.0) && cong) { 832 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 833 } else if (diag != 0.0) { 834 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 835 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 836 for (r = 0; r < len; ++r) { 837 const PetscInt row = lrows[r] + A->rmap->rstart; 838 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 839 } 840 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 842 } else { 843 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 844 } 845 ierr = PetscFree(lrows);CHKERRQ(ierr); 846 847 /* only change matrix nonzero state if pattern was allowed to be changed */ 848 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 849 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 850 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 851 } 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 858 PetscErrorCode ierr; 859 PetscMPIInt n = A->rmap->n; 860 PetscInt i,j,r,m,p = 0,len = 0; 861 PetscInt *lrows,*owners = A->rmap->range; 862 PetscSFNode *rrows; 863 PetscSF sf; 864 const PetscScalar *xx; 865 PetscScalar *bb,*mask; 866 Vec xmask,lmask; 867 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 868 const PetscInt *aj, *ii,*ridx; 869 PetscScalar *aa; 870 871 PetscFunctionBegin; 872 /* Create SF where leaves are input rows and roots are owned rows */ 873 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 874 for (r = 0; r < n; ++r) lrows[r] = -1; 875 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 876 for (r = 0; r < N; ++r) { 877 const PetscInt idx = rows[r]; 878 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 879 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 880 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 881 } 882 rrows[r].rank = p; 883 rrows[r].index = rows[r] - owners[p]; 884 } 885 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 886 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 887 /* Collect flags for rows to be zeroed */ 888 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 889 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 890 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 891 /* Compress and put in row numbers */ 892 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 893 /* zero diagonal part of matrix */ 894 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 895 /* handle off diagonal part of matrix */ 896 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 897 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 898 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 899 for (i=0; i<len; i++) bb[lrows[i]] = 1; 900 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 901 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 904 if (x) { 905 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 908 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 909 } 910 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 911 /* remove zeroed rows of off diagonal matrix */ 912 ii = aij->i; 913 for (i=0; i<len; i++) { 914 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 915 } 916 /* loop over all elements of off process part of matrix zeroing removed columns*/ 917 if (aij->compressedrow.use) { 918 m = aij->compressedrow.nrows; 919 ii = aij->compressedrow.i; 920 ridx = aij->compressedrow.rindex; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 926 for (j=0; j<n; j++) { 927 if (PetscAbsScalar(mask[*aj])) { 928 if (b) bb[*ridx] -= *aa*xx[*aj]; 929 *aa = 0.0; 930 } 931 aa++; 932 aj++; 933 } 934 ridx++; 935 } 936 } else { /* do not use compressed row format */ 937 m = l->B->rmap->n; 938 for (i=0; i<m; i++) { 939 n = ii[i+1] - ii[i]; 940 aj = aij->j + ii[i]; 941 aa = aij->a + ii[i]; 942 for (j=0; j<n; j++) { 943 if (PetscAbsScalar(mask[*aj])) { 944 if (b) bb[i] -= *aa*xx[*aj]; 945 *aa = 0.0; 946 } 947 aa++; 948 aj++; 949 } 950 } 951 } 952 if (x) { 953 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 954 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 955 } 956 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 957 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 958 ierr = PetscFree(lrows);CHKERRQ(ierr); 959 960 /* only change matrix nonzero state if pattern was allowed to be changed */ 961 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 962 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 963 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt nt; 973 VecScatter Mvctx = a->Mvctx; 974 975 PetscFunctionBegin; 976 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 977 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 978 979 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 981 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 997 { 998 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 999 PetscErrorCode ierr; 1000 VecScatter Mvctx = a->Mvctx; 1001 1002 PetscFunctionBegin; 1003 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1004 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 PetscErrorCode ierr; 1015 PetscBool merged; 1016 1017 PetscFunctionBegin; 1018 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1019 /* do nondiagonal part */ 1020 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1021 if (!merged) { 1022 /* send it on its way */ 1023 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1024 /* do local part */ 1025 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1026 /* receive remote parts: note this assumes the values are not actually */ 1027 /* added in yy until the next line, */ 1028 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1029 } else { 1030 /* do local part */ 1031 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1032 /* send it on its way */ 1033 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1034 /* values actually were received in the Begin() but we need to call this nop */ 1035 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 } 1037 PetscFunctionReturn(0); 1038 } 1039 1040 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1041 { 1042 MPI_Comm comm; 1043 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1044 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1045 IS Me,Notme; 1046 PetscErrorCode ierr; 1047 PetscInt M,N,first,last,*notme,i; 1048 PetscMPIInt size; 1049 1050 PetscFunctionBegin; 1051 /* Easy test: symmetric diagonal block */ 1052 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1053 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1054 if (!*f) PetscFunctionReturn(0); 1055 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1057 if (size == 1) PetscFunctionReturn(0); 1058 1059 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1060 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1062 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1063 for (i=0; i<first; i++) notme[i] = i; 1064 for (i=last; i<M; i++) notme[i-last+first] = i; 1065 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1066 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1067 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1068 Aoff = Aoffs[0]; 1069 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1070 Boff = Boffs[0]; 1071 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1072 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1073 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1074 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1075 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1076 ierr = PetscFree(notme);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1097 /* send it on its way */ 1098 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1099 /* do local part */ 1100 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1101 /* receive remote parts */ 1102 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* 1107 This only works correctly for square matrices where the subblock A->A is the 1108 diagonal block 1109 */ 1110 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1111 { 1112 PetscErrorCode ierr; 1113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1114 1115 PetscFunctionBegin; 1116 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1117 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1118 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1123 { 1124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1129 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1134 { 1135 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1136 PetscErrorCode ierr; 1137 1138 PetscFunctionBegin; 1139 #if defined(PETSC_USE_LOG) 1140 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1141 #endif 1142 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1143 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1144 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1145 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1146 #if defined(PETSC_USE_CTABLE) 1147 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1148 #else 1149 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1150 #endif 1151 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1152 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1153 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1154 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1155 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1156 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1157 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1158 1159 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1160 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1161 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1168 #if defined(PETSC_HAVE_ELEMENTAL) 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1170 #endif 1171 #if defined(PETSC_HAVE_HYPRE) 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1174 #endif 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1181 { 1182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1183 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1184 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1185 PetscErrorCode ierr; 1186 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1187 int fd; 1188 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1189 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1190 PetscScalar *column_values; 1191 PetscInt message_count,flowcontrolcount; 1192 FILE *file; 1193 1194 PetscFunctionBegin; 1195 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1197 nz = A->nz + B->nz; 1198 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1199 if (!rank) { 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = mat->rmap->N; 1202 header[2] = mat->cmap->N; 1203 1204 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1205 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 /* get largest number of rows any processor has */ 1207 rlen = mat->rmap->n; 1208 range = mat->rmap->range; 1209 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1210 } else { 1211 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 rlen = mat->rmap->n; 1213 } 1214 1215 /* load up the local row counts */ 1216 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1217 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1218 1219 /* store the row lengths to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 rlen = range[i+1] - range[i]; 1226 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1236 1237 /* load up the local column indices */ 1238 nzmax = nz; /* th processor needs space a largest processor needs */ 1239 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1241 cnt = 0; 1242 for (i=0; i<mat->rmap->n; i++) { 1243 for (j=B->i[i]; j<B->i[i+1]; j++) { 1244 if ((col = garray[B->j[j]]) > cstart) break; 1245 column_indices[cnt++] = col; 1246 } 1247 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1248 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column indices to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1272 1273 /* load up the local column values */ 1274 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1275 cnt = 0; 1276 for (i=0; i<mat->rmap->n; i++) { 1277 for (j=B->i[i]; j<B->i[i+1]; j++) { 1278 if (garray[B->j[j]] > cstart) break; 1279 column_values[cnt++] = B->a[j]; 1280 } 1281 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1282 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1283 } 1284 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1285 1286 /* store the column values to the file */ 1287 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1288 if (!rank) { 1289 MPI_Status status; 1290 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1291 for (i=1; i<size; i++) { 1292 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1293 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1294 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1295 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1297 } 1298 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1299 } else { 1300 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1301 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1304 } 1305 ierr = PetscFree(column_values);CHKERRQ(ierr); 1306 1307 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1308 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #include <petscdraw.h> 1313 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1314 { 1315 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1316 PetscErrorCode ierr; 1317 PetscMPIInt rank = aij->rank,size = aij->size; 1318 PetscBool isdraw,iascii,isbinary; 1319 PetscViewer sviewer; 1320 PetscViewerFormat format; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1326 if (iascii) { 1327 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1328 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1329 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1330 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1331 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1332 for (i=0; i<(PetscInt)size; i++) { 1333 nmax = PetscMax(nmax,nz[i]); 1334 nmin = PetscMin(nmin,nz[i]); 1335 navg += nz[i]; 1336 } 1337 ierr = PetscFree(nz);CHKERRQ(ierr); 1338 navg = navg/size; 1339 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1343 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1344 MatInfo info; 1345 PetscBool inodes; 1346 1347 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1348 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1349 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1351 if (!inodes) { 1352 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1353 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1356 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1357 } 1358 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1360 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1362 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1363 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1365 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1368 PetscInt inodecount,inodelimit,*inodes; 1369 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1370 if (inodes) { 1371 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1374 } 1375 PetscFunctionReturn(0); 1376 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1377 PetscFunctionReturn(0); 1378 } 1379 } else if (isbinary) { 1380 if (size == 1) { 1381 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1382 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1383 } else { 1384 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1385 } 1386 PetscFunctionReturn(0); 1387 } else if (isdraw) { 1388 PetscDraw draw; 1389 PetscBool isnull; 1390 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1391 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1392 if (isnull) PetscFunctionReturn(0); 1393 } 1394 1395 { 1396 /* assemble the entire matrix onto first processor. */ 1397 Mat A; 1398 Mat_SeqAIJ *Aloc; 1399 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1400 MatScalar *a; 1401 1402 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1403 if (!rank) { 1404 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1405 } else { 1406 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1407 } 1408 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1409 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1410 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1411 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1412 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1413 1414 /* copy over the A part */ 1415 Aloc = (Mat_SeqAIJ*)aij->A->data; 1416 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1417 row = mat->rmap->rstart; 1418 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; 1422 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1423 } 1424 aj = Aloc->j; 1425 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1426 1427 /* copy over the B part */ 1428 Aloc = (Mat_SeqAIJ*)aij->B->data; 1429 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1430 row = mat->rmap->rstart; 1431 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1432 ct = cols; 1433 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1434 for (i=0; i<m; i++) { 1435 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1436 row++; 1437 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1438 } 1439 ierr = PetscFree(ct);CHKERRQ(ierr); 1440 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1441 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1442 /* 1443 Everyone has to call to draw the matrix since the graphics waits are 1444 synchronized across all processors that share the PetscDraw object 1445 */ 1446 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1447 if (!rank) { 1448 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1449 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1450 } 1451 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1452 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1453 ierr = MatDestroy(&A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1459 { 1460 PetscErrorCode ierr; 1461 PetscBool iascii,isdraw,issocket,isbinary; 1462 1463 PetscFunctionBegin; 1464 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1465 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1466 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1468 if (iascii || isdraw || isbinary || issocket) { 1469 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1475 { 1476 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1477 PetscErrorCode ierr; 1478 Vec bb1 = 0; 1479 PetscBool hasop; 1480 1481 PetscFunctionBegin; 1482 if (flag == SOR_APPLY_UPPER) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1488 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1489 } 1490 1491 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 1497 while (its--) { 1498 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 1501 /* update rhs: bb1 = bb - B*x */ 1502 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1503 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1504 1505 /* local sweep */ 1506 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1507 } 1508 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1509 if (flag & SOR_ZERO_INITIAL_GUESS) { 1510 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1511 its--; 1512 } 1513 while (its--) { 1514 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 1517 /* update rhs: bb1 = bb - B*x */ 1518 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1519 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1520 1521 /* local sweep */ 1522 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1523 } 1524 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1525 if (flag & SOR_ZERO_INITIAL_GUESS) { 1526 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1527 its--; 1528 } 1529 while (its--) { 1530 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1532 1533 /* update rhs: bb1 = bb - B*x */ 1534 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1535 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1536 1537 /* local sweep */ 1538 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1539 } 1540 } else if (flag & SOR_EISENSTAT) { 1541 Vec xx1; 1542 1543 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1544 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1545 1546 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 if (!mat->diag) { 1549 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1550 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1551 } 1552 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1553 if (hasop) { 1554 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1555 } else { 1556 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1557 } 1558 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1559 1560 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1561 1562 /* local sweep */ 1563 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1564 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1565 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1566 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1567 1568 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1569 1570 matin->factorerrortype = mat->A->factorerrortype; 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1575 { 1576 Mat aA,aB,Aperm; 1577 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1578 PetscScalar *aa,*ba; 1579 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1580 PetscSF rowsf,sf; 1581 IS parcolp = NULL; 1582 PetscBool done; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1587 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1590 1591 /* Invert row permutation to find out where my rows should go */ 1592 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1593 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1594 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1596 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 1599 /* Invert column permutation to find out where my columns should go */ 1600 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1601 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1602 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1603 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1604 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1607 1608 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1610 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1611 1612 /* Find out where my gcols should go */ 1613 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1614 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1621 1622 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt row = rdest[i],rowner; 1627 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 PetscInt cowner,col = cdest[aj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 for (j=bi[i]; j<bi[i+1]; j++) { 1635 PetscInt cowner,col = gcdest[bj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 } 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1645 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1646 1647 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1649 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1652 PetscInt j0,rowlen; 1653 rowlen = ai[i+1] - ai[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 rowlen = bi[i+1] - bi[i]; 1659 for (j0=j=0; j<rowlen; j0=j) { 1660 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1661 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1669 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1670 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1671 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1672 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1673 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1674 *B = Aperm; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1679 { 1680 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1685 if (ghosts) *ghosts = aij->garray; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1690 { 1691 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1692 Mat A = mat->A,B = mat->B; 1693 PetscErrorCode ierr; 1694 PetscReal isend[5],irecv[5]; 1695 1696 PetscFunctionBegin; 1697 info->block_size = 1.0; 1698 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1699 1700 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1701 isend[3] = info->memory; isend[4] = info->mallocs; 1702 1703 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1704 1705 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1706 isend[3] += info->memory; isend[4] += info->mallocs; 1707 if (flag == MAT_LOCAL) { 1708 info->nz_used = isend[0]; 1709 info->nz_allocated = isend[1]; 1710 info->nz_unneeded = isend[2]; 1711 info->memory = isend[3]; 1712 info->mallocs = isend[4]; 1713 } else if (flag == MAT_GLOBAL_MAX) { 1714 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1715 1716 info->nz_used = irecv[0]; 1717 info->nz_allocated = irecv[1]; 1718 info->nz_unneeded = irecv[2]; 1719 info->memory = irecv[3]; 1720 info->mallocs = irecv[4]; 1721 } else if (flag == MAT_GLOBAL_SUM) { 1722 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1723 1724 info->nz_used = irecv[0]; 1725 info->nz_allocated = irecv[1]; 1726 info->nz_unneeded = irecv[2]; 1727 info->memory = irecv[3]; 1728 info->mallocs = irecv[4]; 1729 } 1730 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1731 info->fill_ratio_needed = 0; 1732 info->factor_mallocs = 0; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 switch (op) { 1743 case MAT_NEW_NONZERO_LOCATIONS: 1744 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1745 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1746 case MAT_KEEP_NONZERO_PATTERN: 1747 case MAT_NEW_NONZERO_LOCATION_ERR: 1748 case MAT_USE_INODES: 1749 case MAT_IGNORE_ZERO_ENTRIES: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_ROW_ORIENTED: 1755 MatCheckPreallocated(A,1); 1756 a->roworiented = flg; 1757 1758 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1759 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1760 break; 1761 case MAT_NEW_DIAGONALS: 1762 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1763 break; 1764 case MAT_IGNORE_OFF_PROC_ENTRIES: 1765 a->donotstash = flg; 1766 break; 1767 case MAT_SPD: 1768 A->spd_set = PETSC_TRUE; 1769 A->spd = flg; 1770 if (flg) { 1771 A->symmetric = PETSC_TRUE; 1772 A->structurally_symmetric = PETSC_TRUE; 1773 A->symmetric_set = PETSC_TRUE; 1774 A->structurally_symmetric_set = PETSC_TRUE; 1775 } 1776 break; 1777 case MAT_SYMMETRIC: 1778 MatCheckPreallocated(A,1); 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_STRUCTURALLY_SYMMETRIC: 1782 MatCheckPreallocated(A,1); 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_HERMITIAN: 1786 MatCheckPreallocated(A,1); 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 break; 1789 case MAT_SYMMETRY_ETERNAL: 1790 MatCheckPreallocated(A,1); 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_SUBMAT_SINGLEIS: 1794 A->submat_singleis = flg; 1795 break; 1796 case MAT_STRUCTURE_ONLY: 1797 /* The option is handled directly by MatSetOption() */ 1798 break; 1799 default: 1800 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1808 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1809 PetscErrorCode ierr; 1810 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1811 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1812 PetscInt *cmap,*idx_p; 1813 1814 PetscFunctionBegin; 1815 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1816 mat->getrowactive = PETSC_TRUE; 1817 1818 if (!mat->rowvalues && (idx || v)) { 1819 /* 1820 allocate enough space to hold information from the longest row. 1821 */ 1822 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1823 PetscInt max = 1,tmp; 1824 for (i=0; i<matin->rmap->n; i++) { 1825 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1826 if (max < tmp) max = tmp; 1827 } 1828 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1829 } 1830 1831 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1832 lrow = row - rstart; 1833 1834 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1835 if (!v) {pvA = 0; pvB = 0;} 1836 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1837 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1838 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1839 nztot = nzA + nzB; 1840 1841 cmap = mat->garray; 1842 if (v || idx) { 1843 if (nztot) { 1844 /* Sort by increasing column numbers, assuming A and B already sorted */ 1845 PetscInt imark = -1; 1846 if (v) { 1847 *v = v_p = mat->rowvalues; 1848 for (i=0; i<nzB; i++) { 1849 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1850 else break; 1851 } 1852 imark = i; 1853 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1854 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1855 } 1856 if (idx) { 1857 *idx = idx_p = mat->rowindices; 1858 if (imark > -1) { 1859 for (i=0; i<imark; i++) { 1860 idx_p[i] = cmap[cworkB[i]]; 1861 } 1862 } else { 1863 for (i=0; i<nzB; i++) { 1864 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1865 else break; 1866 } 1867 imark = i; 1868 } 1869 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1870 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1871 } 1872 } else { 1873 if (idx) *idx = 0; 1874 if (v) *v = 0; 1875 } 1876 } 1877 *nz = nztot; 1878 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1879 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1884 { 1885 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1886 1887 PetscFunctionBegin; 1888 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1889 aij->getrowactive = PETSC_FALSE; 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1894 { 1895 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1896 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1897 PetscErrorCode ierr; 1898 PetscInt i,j,cstart = mat->cmap->rstart; 1899 PetscReal sum = 0.0; 1900 MatScalar *v; 1901 1902 PetscFunctionBegin; 1903 if (aij->size == 1) { 1904 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1905 } else { 1906 if (type == NORM_FROBENIUS) { 1907 v = amat->a; 1908 for (i=0; i<amat->nz; i++) { 1909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1910 } 1911 v = bmat->a; 1912 for (i=0; i<bmat->nz; i++) { 1913 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1914 } 1915 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1916 *norm = PetscSqrtReal(*norm); 1917 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1918 } else if (type == NORM_1) { /* max column norm */ 1919 PetscReal *tmp,*tmp2; 1920 PetscInt *jj,*garray = aij->garray; 1921 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1922 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1923 *norm = 0.0; 1924 v = amat->a; jj = amat->j; 1925 for (j=0; j<amat->nz; j++) { 1926 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1927 } 1928 v = bmat->a; jj = bmat->j; 1929 for (j=0; j<bmat->nz; j++) { 1930 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1931 } 1932 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1933 for (j=0; j<mat->cmap->N; j++) { 1934 if (tmp2[j] > *norm) *norm = tmp2[j]; 1935 } 1936 ierr = PetscFree(tmp);CHKERRQ(ierr); 1937 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1938 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1939 } else if (type == NORM_INFINITY) { /* max row norm */ 1940 PetscReal ntemp = 0.0; 1941 for (j=0; j<aij->A->rmap->n; j++) { 1942 v = amat->a + amat->i[j]; 1943 sum = 0.0; 1944 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1945 sum += PetscAbsScalar(*v); v++; 1946 } 1947 v = bmat->a + bmat->i[j]; 1948 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1949 sum += PetscAbsScalar(*v); v++; 1950 } 1951 if (sum > ntemp) ntemp = sum; 1952 } 1953 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1955 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1961 { 1962 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1963 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1964 PetscErrorCode ierr; 1965 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1966 PetscInt cstart = A->cmap->rstart,ncol; 1967 Mat B; 1968 MatScalar *array; 1969 1970 PetscFunctionBegin; 1971 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1972 ai = Aloc->i; aj = Aloc->j; 1973 bi = Bloc->i; bj = Bloc->j; 1974 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1975 PetscInt *d_nnz,*g_nnz,*o_nnz; 1976 PetscSFNode *oloc; 1977 PETSC_UNUSED PetscSF sf; 1978 1979 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1980 /* compute d_nnz for preallocation */ 1981 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1982 for (i=0; i<ai[ma]; i++) { 1983 d_nnz[aj[i]]++; 1984 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1985 } 1986 /* compute local off-diagonal contributions */ 1987 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1988 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1989 /* map those to global */ 1990 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1991 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1992 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1993 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1994 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1995 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1996 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1997 1998 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1999 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2000 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2001 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2002 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2003 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2004 } else { 2005 B = *matout; 2006 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2007 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2008 } 2009 2010 /* copy over the A part */ 2011 array = Aloc->a; 2012 row = A->rmap->rstart; 2013 for (i=0; i<ma; i++) { 2014 ncol = ai[i+1]-ai[i]; 2015 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; aj += ncol; 2018 } 2019 aj = Aloc->j; 2020 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2021 2022 /* copy over the B part */ 2023 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2024 array = Bloc->a; 2025 row = A->rmap->rstart; 2026 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2027 cols_tmp = cols; 2028 for (i=0; i<mb; i++) { 2029 ncol = bi[i+1]-bi[i]; 2030 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2031 row++; 2032 array += ncol; cols_tmp += ncol; 2033 } 2034 ierr = PetscFree(cols);CHKERRQ(ierr); 2035 2036 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2039 *matout = B; 2040 } else { 2041 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2047 { 2048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2049 Mat a = aij->A,b = aij->B; 2050 PetscErrorCode ierr; 2051 PetscInt s1,s2,s3; 2052 2053 PetscFunctionBegin; 2054 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2055 if (rr) { 2056 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2057 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2058 /* Overlap communication with computation. */ 2059 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 } 2061 if (ll) { 2062 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2063 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2064 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2065 } 2066 /* scale the diagonal block */ 2067 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2068 2069 if (rr) { 2070 /* Do a scatter end and then right scale the off-diagonal block */ 2071 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2078 { 2079 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2088 { 2089 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2090 Mat a,b,c,d; 2091 PetscBool flg; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 a = matA->A; b = matA->B; 2096 c = matB->A; d = matB->B; 2097 2098 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2099 if (flg) { 2100 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2101 } 2102 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2110 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2111 2112 PetscFunctionBegin; 2113 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2114 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2115 /* because of the column compression in the off-processor part of the matrix a->B, 2116 the number of columns in a->B and b->B may be different, hence we cannot call 2117 the MatCopy() directly on the two parts. If need be, we can provide a more 2118 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2119 then copying the submatrices */ 2120 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2123 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2124 } 2125 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2126 PetscFunctionReturn(0); 2127 } 2128 2129 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2130 { 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /* 2139 Computes the number of nonzeros per row needed for preallocation when X and Y 2140 have different nonzero structure. 2141 */ 2142 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2143 { 2144 PetscInt i,j,k,nzx,nzy; 2145 2146 PetscFunctionBegin; 2147 /* Set the number of nonzeros in the new matrix */ 2148 for (i=0; i<m; i++) { 2149 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2150 nzx = xi[i+1] - xi[i]; 2151 nzy = yi[i+1] - yi[i]; 2152 nnz[i] = 0; 2153 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2154 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2155 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2156 nnz[i]++; 2157 } 2158 for (; k<nzy; k++) nnz[i]++; 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2164 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2165 { 2166 PetscErrorCode ierr; 2167 PetscInt m = Y->rmap->N; 2168 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2169 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2170 2171 PetscFunctionBegin; 2172 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2177 { 2178 PetscErrorCode ierr; 2179 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2180 PetscBLASInt bnz,one=1; 2181 Mat_SeqAIJ *x,*y; 2182 2183 PetscFunctionBegin; 2184 if (str == SAME_NONZERO_PATTERN) { 2185 PetscScalar alpha = a; 2186 x = (Mat_SeqAIJ*)xx->A->data; 2187 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2188 y = (Mat_SeqAIJ*)yy->A->data; 2189 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2193 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2194 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2195 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2196 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2197 } else { 2198 Mat B; 2199 PetscInt *nnz_d,*nnz_o; 2200 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2201 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2202 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2204 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2205 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2206 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2207 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2209 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2210 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2211 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2212 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2219 2220 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2221 { 2222 #if defined(PETSC_USE_COMPLEX) 2223 PetscErrorCode ierr; 2224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2225 2226 PetscFunctionBegin; 2227 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2228 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2229 #else 2230 PetscFunctionBegin; 2231 #endif 2232 PetscFunctionReturn(0); 2233 } 2234 2235 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2236 { 2237 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2238 PetscErrorCode ierr; 2239 2240 PetscFunctionBegin; 2241 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2242 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2247 { 2248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2253 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2258 { 2259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2260 PetscErrorCode ierr; 2261 PetscInt i,*idxb = 0; 2262 PetscScalar *va,*vb; 2263 Vec vtmp; 2264 2265 PetscFunctionBegin; 2266 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2267 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2268 if (idx) { 2269 for (i=0; i<A->rmap->n; i++) { 2270 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2271 } 2272 } 2273 2274 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2275 if (idx) { 2276 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2277 } 2278 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2279 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2280 2281 for (i=0; i<A->rmap->n; i++) { 2282 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2283 va[i] = vb[i]; 2284 if (idx) idx[i] = a->garray[idxb[i]]; 2285 } 2286 } 2287 2288 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2289 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2290 ierr = PetscFree(idxb);CHKERRQ(ierr); 2291 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 PetscInt i,*idxb = 0; 2300 PetscScalar *va,*vb; 2301 Vec vtmp; 2302 2303 PetscFunctionBegin; 2304 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2305 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2306 if (idx) { 2307 for (i=0; i<A->cmap->n; i++) { 2308 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2309 } 2310 } 2311 2312 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2313 if (idx) { 2314 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2315 } 2316 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2317 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2318 2319 for (i=0; i<A->rmap->n; i++) { 2320 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2321 va[i] = vb[i]; 2322 if (idx) idx[i] = a->garray[idxb[i]]; 2323 } 2324 } 2325 2326 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2327 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2328 ierr = PetscFree(idxb);CHKERRQ(ierr); 2329 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2330 PetscFunctionReturn(0); 2331 } 2332 2333 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2334 { 2335 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2336 PetscInt n = A->rmap->n; 2337 PetscInt cstart = A->cmap->rstart; 2338 PetscInt *cmap = mat->garray; 2339 PetscInt *diagIdx, *offdiagIdx; 2340 Vec diagV, offdiagV; 2341 PetscScalar *a, *diagA, *offdiagA; 2342 PetscInt r; 2343 PetscErrorCode ierr; 2344 2345 PetscFunctionBegin; 2346 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2347 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2348 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2349 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2350 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2351 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2352 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2353 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2354 for (r = 0; r < n; ++r) { 2355 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2356 a[r] = diagA[r]; 2357 idx[r] = cstart + diagIdx[r]; 2358 } else { 2359 a[r] = offdiagA[r]; 2360 idx[r] = cmap[offdiagIdx[r]]; 2361 } 2362 } 2363 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2367 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2368 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2369 PetscFunctionReturn(0); 2370 } 2371 2372 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2373 { 2374 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2375 PetscInt n = A->rmap->n; 2376 PetscInt cstart = A->cmap->rstart; 2377 PetscInt *cmap = mat->garray; 2378 PetscInt *diagIdx, *offdiagIdx; 2379 Vec diagV, offdiagV; 2380 PetscScalar *a, *diagA, *offdiagA; 2381 PetscInt r; 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2386 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2387 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2388 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2389 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2390 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2391 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2392 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2393 for (r = 0; r < n; ++r) { 2394 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2395 a[r] = diagA[r]; 2396 idx[r] = cstart + diagIdx[r]; 2397 } else { 2398 a[r] = offdiagA[r]; 2399 idx[r] = cmap[offdiagIdx[r]]; 2400 } 2401 } 2402 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2403 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2404 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2405 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2406 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2407 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2412 { 2413 PetscErrorCode ierr; 2414 Mat *dummy; 2415 2416 PetscFunctionBegin; 2417 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2418 *newmat = *dummy; 2419 ierr = PetscFree(dummy);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2424 { 2425 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2430 A->factorerrortype = a->A->factorerrortype; 2431 PetscFunctionReturn(0); 2432 } 2433 2434 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2435 { 2436 PetscErrorCode ierr; 2437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2438 2439 PetscFunctionBegin; 2440 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2441 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2442 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2443 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2448 { 2449 PetscFunctionBegin; 2450 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2451 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /*@ 2456 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2457 2458 Collective on Mat 2459 2460 Input Parameters: 2461 + A - the matrix 2462 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2463 2464 Level: advanced 2465 2466 @*/ 2467 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2468 { 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2477 { 2478 PetscErrorCode ierr; 2479 PetscBool sc = PETSC_FALSE,flg; 2480 2481 PetscFunctionBegin; 2482 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2483 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2484 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2485 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2486 if (flg) { 2487 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2488 } 2489 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2494 { 2495 PetscErrorCode ierr; 2496 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2497 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2498 2499 PetscFunctionBegin; 2500 if (!Y->preallocated) { 2501 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2502 } else if (!aij->nz) { 2503 PetscInt nonew = aij->nonew; 2504 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2505 aij->nonew = nonew; 2506 } 2507 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2512 { 2513 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2518 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2519 if (d) { 2520 PetscInt rstart; 2521 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2522 *d += rstart; 2523 2524 } 2525 PetscFunctionReturn(0); 2526 } 2527 2528 2529 /* -------------------------------------------------------------------*/ 2530 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2531 MatGetRow_MPIAIJ, 2532 MatRestoreRow_MPIAIJ, 2533 MatMult_MPIAIJ, 2534 /* 4*/ MatMultAdd_MPIAIJ, 2535 MatMultTranspose_MPIAIJ, 2536 MatMultTransposeAdd_MPIAIJ, 2537 0, 2538 0, 2539 0, 2540 /*10*/ 0, 2541 0, 2542 0, 2543 MatSOR_MPIAIJ, 2544 MatTranspose_MPIAIJ, 2545 /*15*/ MatGetInfo_MPIAIJ, 2546 MatEqual_MPIAIJ, 2547 MatGetDiagonal_MPIAIJ, 2548 MatDiagonalScale_MPIAIJ, 2549 MatNorm_MPIAIJ, 2550 /*20*/ MatAssemblyBegin_MPIAIJ, 2551 MatAssemblyEnd_MPIAIJ, 2552 MatSetOption_MPIAIJ, 2553 MatZeroEntries_MPIAIJ, 2554 /*24*/ MatZeroRows_MPIAIJ, 2555 0, 2556 0, 2557 0, 2558 0, 2559 /*29*/ MatSetUp_MPIAIJ, 2560 0, 2561 0, 2562 MatGetDiagonalBlock_MPIAIJ, 2563 0, 2564 /*34*/ MatDuplicate_MPIAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*39*/ MatAXPY_MPIAIJ, 2570 MatCreateSubMatrices_MPIAIJ, 2571 MatIncreaseOverlap_MPIAIJ, 2572 MatGetValues_MPIAIJ, 2573 MatCopy_MPIAIJ, 2574 /*44*/ MatGetRowMax_MPIAIJ, 2575 MatScale_MPIAIJ, 2576 MatShift_MPIAIJ, 2577 MatDiagonalSet_MPIAIJ, 2578 MatZeroRowsColumns_MPIAIJ, 2579 /*49*/ MatSetRandom_MPIAIJ, 2580 0, 2581 0, 2582 0, 2583 0, 2584 /*54*/ MatFDColoringCreate_MPIXAIJ, 2585 0, 2586 MatSetUnfactored_MPIAIJ, 2587 MatPermute_MPIAIJ, 2588 0, 2589 /*59*/ MatCreateSubMatrix_MPIAIJ, 2590 MatDestroy_MPIAIJ, 2591 MatView_MPIAIJ, 2592 0, 2593 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2594 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2595 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2596 0, 2597 0, 2598 0, 2599 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2600 MatGetRowMinAbs_MPIAIJ, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*75*/ MatFDColoringApply_AIJ, 2606 MatSetFromOptions_MPIAIJ, 2607 0, 2608 0, 2609 MatFindZeroDiagonals_MPIAIJ, 2610 /*80*/ 0, 2611 0, 2612 0, 2613 /*83*/ MatLoad_MPIAIJ, 2614 MatIsSymmetric_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2620 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2621 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2622 MatPtAP_MPIAIJ_MPIAIJ, 2623 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2624 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*99*/ 0, 2630 0, 2631 0, 2632 MatConjugate_MPIAIJ, 2633 0, 2634 /*104*/MatSetValuesRow_MPIAIJ, 2635 MatRealPart_MPIAIJ, 2636 MatImaginaryPart_MPIAIJ, 2637 0, 2638 0, 2639 /*109*/0, 2640 0, 2641 MatGetRowMin_MPIAIJ, 2642 0, 2643 MatMissingDiagonal_MPIAIJ, 2644 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2645 0, 2646 MatGetGhosts_MPIAIJ, 2647 0, 2648 0, 2649 /*119*/0, 2650 0, 2651 0, 2652 0, 2653 MatGetMultiProcBlock_MPIAIJ, 2654 /*124*/MatFindNonzeroRows_MPIAIJ, 2655 MatGetColumnNorms_MPIAIJ, 2656 MatInvertBlockDiagonal_MPIAIJ, 2657 0, 2658 MatCreateSubMatricesMPI_MPIAIJ, 2659 /*129*/0, 2660 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2661 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2662 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2663 0, 2664 /*134*/0, 2665 0, 2666 MatRARt_MPIAIJ_MPIAIJ, 2667 0, 2668 0, 2669 /*139*/MatSetBlockSizes_MPIAIJ, 2670 0, 2671 0, 2672 MatFDColoringSetUp_MPIXAIJ, 2673 MatFindOffBlockDiagonalEntries_MPIAIJ, 2674 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2675 }; 2676 2677 /* ----------------------------------------------------------------------------------------*/ 2678 2679 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2680 { 2681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2682 PetscErrorCode ierr; 2683 2684 PetscFunctionBegin; 2685 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2686 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2687 PetscFunctionReturn(0); 2688 } 2689 2690 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2691 { 2692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2693 PetscErrorCode ierr; 2694 2695 PetscFunctionBegin; 2696 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2697 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2702 { 2703 Mat_MPIAIJ *b; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2708 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2709 b = (Mat_MPIAIJ*)B->data; 2710 2711 #if defined(PETSC_USE_CTABLE) 2712 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2713 #else 2714 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2715 #endif 2716 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2717 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2718 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2719 2720 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2721 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2722 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2723 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2724 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2725 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2726 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2727 2728 if (!B->preallocated) { 2729 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2730 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2731 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2732 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2733 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2734 } 2735 2736 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2737 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2738 B->preallocated = PETSC_TRUE; 2739 B->was_assembled = PETSC_FALSE; 2740 B->assembled = PETSC_FALSE;; 2741 PetscFunctionReturn(0); 2742 } 2743 2744 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2745 { 2746 Mat_MPIAIJ *b; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2751 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2752 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2753 b = (Mat_MPIAIJ*)B->data; 2754 2755 #if defined(PETSC_USE_CTABLE) 2756 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2757 #else 2758 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2759 #endif 2760 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2761 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2762 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2763 2764 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2765 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2766 B->preallocated = PETSC_TRUE; 2767 B->was_assembled = PETSC_FALSE; 2768 B->assembled = PETSC_FALSE; 2769 PetscFunctionReturn(0); 2770 } 2771 2772 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2773 { 2774 Mat mat; 2775 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2776 PetscErrorCode ierr; 2777 2778 PetscFunctionBegin; 2779 *newmat = 0; 2780 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2781 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2782 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2783 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2784 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2785 a = (Mat_MPIAIJ*)mat->data; 2786 2787 mat->factortype = matin->factortype; 2788 mat->assembled = PETSC_TRUE; 2789 mat->insertmode = NOT_SET_VALUES; 2790 mat->preallocated = PETSC_TRUE; 2791 2792 a->size = oldmat->size; 2793 a->rank = oldmat->rank; 2794 a->donotstash = oldmat->donotstash; 2795 a->roworiented = oldmat->roworiented; 2796 a->rowindices = 0; 2797 a->rowvalues = 0; 2798 a->getrowactive = PETSC_FALSE; 2799 2800 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2802 2803 if (oldmat->colmap) { 2804 #if defined(PETSC_USE_CTABLE) 2805 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2806 #else 2807 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2808 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2809 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2810 #endif 2811 } else a->colmap = 0; 2812 if (oldmat->garray) { 2813 PetscInt len; 2814 len = oldmat->B->cmap->n; 2815 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2816 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2817 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2818 } else a->garray = 0; 2819 2820 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2821 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2822 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2823 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2824 2825 if (oldmat->Mvctx_mpi1) { 2826 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2827 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2828 } 2829 2830 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2831 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2832 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2833 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2834 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2835 *newmat = mat; 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2840 { 2841 PetscScalar *vals,*svals; 2842 MPI_Comm comm; 2843 PetscErrorCode ierr; 2844 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2845 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2846 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2847 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2848 PetscInt cend,cstart,n,*rowners; 2849 int fd; 2850 PetscInt bs = newMat->rmap->bs; 2851 2852 PetscFunctionBegin; 2853 /* force binary viewer to load .info file if it has not yet done so */ 2854 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2855 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2856 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2857 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2858 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2859 if (!rank) { 2860 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2861 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2862 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2863 } 2864 2865 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2866 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2867 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2868 if (bs < 0) bs = 1; 2869 2870 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2871 M = header[1]; N = header[2]; 2872 2873 /* If global sizes are set, check if they are consistent with that given in the file */ 2874 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2875 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2876 2877 /* determine ownership of all (block) rows */ 2878 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2879 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2880 else m = newMat->rmap->n; /* Set by user */ 2881 2882 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2883 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2884 2885 /* First process needs enough room for process with most rows */ 2886 if (!rank) { 2887 mmax = rowners[1]; 2888 for (i=2; i<=size; i++) { 2889 mmax = PetscMax(mmax, rowners[i]); 2890 } 2891 } else mmax = -1; /* unused, but compilers complain */ 2892 2893 rowners[0] = 0; 2894 for (i=2; i<=size; i++) { 2895 rowners[i] += rowners[i-1]; 2896 } 2897 rstart = rowners[rank]; 2898 rend = rowners[rank+1]; 2899 2900 /* distribute row lengths to all processors */ 2901 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2902 if (!rank) { 2903 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2905 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2906 for (j=0; j<m; j++) { 2907 procsnz[0] += ourlens[j]; 2908 } 2909 for (i=1; i<size; i++) { 2910 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2911 /* calculate the number of nonzeros on each processor */ 2912 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2913 procsnz[i] += rowlengths[j]; 2914 } 2915 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2916 } 2917 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2918 } else { 2919 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2920 } 2921 2922 if (!rank) { 2923 /* determine max buffer needed and allocate it */ 2924 maxnz = 0; 2925 for (i=0; i<size; i++) { 2926 maxnz = PetscMax(maxnz,procsnz[i]); 2927 } 2928 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2929 2930 /* read in my part of the matrix column indices */ 2931 nz = procsnz[0]; 2932 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2933 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2934 2935 /* read in every one elses and ship off */ 2936 for (i=1; i<size; i++) { 2937 nz = procsnz[i]; 2938 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2939 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2940 } 2941 ierr = PetscFree(cols);CHKERRQ(ierr); 2942 } else { 2943 /* determine buffer space needed for message */ 2944 nz = 0; 2945 for (i=0; i<m; i++) { 2946 nz += ourlens[i]; 2947 } 2948 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2949 2950 /* receive message of column indices*/ 2951 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2952 } 2953 2954 /* determine column ownership if matrix is not square */ 2955 if (N != M) { 2956 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2957 else n = newMat->cmap->n; 2958 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2959 cstart = cend - n; 2960 } else { 2961 cstart = rstart; 2962 cend = rend; 2963 n = cend - cstart; 2964 } 2965 2966 /* loop over local rows, determining number of off diagonal entries */ 2967 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2968 jj = 0; 2969 for (i=0; i<m; i++) { 2970 for (j=0; j<ourlens[i]; j++) { 2971 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2972 jj++; 2973 } 2974 } 2975 2976 for (i=0; i<m; i++) { 2977 ourlens[i] -= offlens[i]; 2978 } 2979 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2980 2981 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2982 2983 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2984 2985 for (i=0; i<m; i++) { 2986 ourlens[i] += offlens[i]; 2987 } 2988 2989 if (!rank) { 2990 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2991 2992 /* read in my part of the matrix numerical values */ 2993 nz = procsnz[0]; 2994 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2995 2996 /* insert into matrix */ 2997 jj = rstart; 2998 smycols = mycols; 2999 svals = vals; 3000 for (i=0; i<m; i++) { 3001 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3002 smycols += ourlens[i]; 3003 svals += ourlens[i]; 3004 jj++; 3005 } 3006 3007 /* read in other processors and ship out */ 3008 for (i=1; i<size; i++) { 3009 nz = procsnz[i]; 3010 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3011 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3012 } 3013 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3014 } else { 3015 /* receive numeric values */ 3016 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3017 3018 /* receive message of values*/ 3019 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3020 3021 /* insert into matrix */ 3022 jj = rstart; 3023 smycols = mycols; 3024 svals = vals; 3025 for (i=0; i<m; i++) { 3026 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3027 smycols += ourlens[i]; 3028 svals += ourlens[i]; 3029 jj++; 3030 } 3031 } 3032 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3033 ierr = PetscFree(vals);CHKERRQ(ierr); 3034 ierr = PetscFree(mycols);CHKERRQ(ierr); 3035 ierr = PetscFree(rowners);CHKERRQ(ierr); 3036 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3037 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 PetscFunctionReturn(0); 3039 } 3040 3041 /* Not scalable because of ISAllGather() unless getting all columns. */ 3042 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3043 { 3044 PetscErrorCode ierr; 3045 IS iscol_local; 3046 PetscBool isstride; 3047 PetscMPIInt lisstride=0,gisstride; 3048 3049 PetscFunctionBegin; 3050 /* check if we are grabbing all columns*/ 3051 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3052 3053 if (isstride) { 3054 PetscInt start,len,mstart,mlen; 3055 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3056 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3057 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3058 if (mstart == start && mlen-mstart == len) lisstride = 1; 3059 } 3060 3061 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3062 if (gisstride) { 3063 PetscInt N; 3064 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3065 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3066 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3067 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3068 } else { 3069 PetscInt cbs; 3070 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3071 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3072 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3073 } 3074 3075 *isseq = iscol_local; 3076 PetscFunctionReturn(0); 3077 } 3078 3079 /* 3080 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3081 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3082 3083 Input Parameters: 3084 mat - matrix 3085 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3086 i.e., mat->rstart <= isrow[i] < mat->rend 3087 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3088 i.e., mat->cstart <= iscol[i] < mat->cend 3089 Output Parameter: 3090 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3091 iscol_o - sequential column index set for retrieving mat->B 3092 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3093 */ 3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3095 { 3096 PetscErrorCode ierr; 3097 Vec x,cmap; 3098 const PetscInt *is_idx; 3099 PetscScalar *xarray,*cmaparray; 3100 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3101 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3102 Mat B=a->B; 3103 Vec lvec=a->lvec,lcmap; 3104 PetscInt i,cstart,cend,Bn=B->cmap->N; 3105 MPI_Comm comm; 3106 VecScatter Mvctx=a->Mvctx; 3107 3108 PetscFunctionBegin; 3109 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3110 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3111 3112 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3113 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3114 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3115 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3116 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3117 3118 /* Get start indices */ 3119 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3120 isstart -= ncols; 3121 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3122 3123 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3124 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3125 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3126 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3127 for (i=0; i<ncols; i++) { 3128 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3129 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3130 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3131 } 3132 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3133 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3134 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3135 3136 /* Get iscol_d */ 3137 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3138 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3139 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3140 3141 /* Get isrow_d */ 3142 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3143 rstart = mat->rmap->rstart; 3144 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3145 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3146 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3147 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3148 3149 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3150 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3151 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3152 3153 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3154 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3155 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3156 3157 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3158 3159 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3160 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3161 3162 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3163 /* off-process column indices */ 3164 count = 0; 3165 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3166 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3167 3168 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3169 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3170 for (i=0; i<Bn; i++) { 3171 if (PetscRealPart(xarray[i]) > -1.0) { 3172 idx[count] = i; /* local column index in off-diagonal part B */ 3173 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3174 count++; 3175 } 3176 } 3177 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3178 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3179 3180 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3181 /* cannot ensure iscol_o has same blocksize as iscol! */ 3182 3183 ierr = PetscFree(idx);CHKERRQ(ierr); 3184 *garray = cmap1; 3185 3186 ierr = VecDestroy(&x);CHKERRQ(ierr); 3187 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3188 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191 3192 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3193 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3194 { 3195 PetscErrorCode ierr; 3196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3197 Mat M = NULL; 3198 MPI_Comm comm; 3199 IS iscol_d,isrow_d,iscol_o; 3200 Mat Asub = NULL,Bsub = NULL; 3201 PetscInt n; 3202 3203 PetscFunctionBegin; 3204 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3205 3206 if (call == MAT_REUSE_MATRIX) { 3207 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3208 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3209 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3210 3211 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3212 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3213 3214 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3215 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3216 3217 /* Update diagonal and off-diagonal portions of submat */ 3218 asub = (Mat_MPIAIJ*)(*submat)->data; 3219 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3220 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3221 if (n) { 3222 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3223 } 3224 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 3227 } else { /* call == MAT_INITIAL_MATRIX) */ 3228 const PetscInt *garray; 3229 PetscInt BsubN; 3230 3231 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3232 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3233 3234 /* Create local submatrices Asub and Bsub */ 3235 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3236 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3237 3238 /* Create submatrix M */ 3239 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3240 3241 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3242 asub = (Mat_MPIAIJ*)M->data; 3243 3244 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3245 n = asub->B->cmap->N; 3246 if (BsubN > n) { 3247 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3248 const PetscInt *idx; 3249 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3250 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3251 3252 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3253 j = 0; 3254 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3255 for (i=0; i<n; i++) { 3256 if (j >= BsubN) break; 3257 while (subgarray[i] > garray[j]) j++; 3258 3259 if (subgarray[i] == garray[j]) { 3260 idx_new[i] = idx[j++]; 3261 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3262 } 3263 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3264 3265 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3266 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3267 3268 } else if (BsubN < n) { 3269 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3270 } 3271 3272 ierr = PetscFree(garray);CHKERRQ(ierr); 3273 *submat = M; 3274 3275 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3276 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3277 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3278 3279 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3280 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3281 3282 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3283 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3284 } 3285 PetscFunctionReturn(0); 3286 } 3287 3288 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3289 { 3290 PetscErrorCode ierr; 3291 IS iscol_local=NULL,isrow_d; 3292 PetscInt csize; 3293 PetscInt n,i,j,start,end; 3294 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3295 MPI_Comm comm; 3296 3297 PetscFunctionBegin; 3298 /* If isrow has same processor distribution as mat, 3299 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3300 if (call == MAT_REUSE_MATRIX) { 3301 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3302 if (isrow_d) { 3303 sameRowDist = PETSC_TRUE; 3304 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3305 } else { 3306 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3307 if (iscol_local) { 3308 sameRowDist = PETSC_TRUE; 3309 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3310 } 3311 } 3312 } else { 3313 /* Check if isrow has same processor distribution as mat */ 3314 sameDist[0] = PETSC_FALSE; 3315 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3316 if (!n) { 3317 sameDist[0] = PETSC_TRUE; 3318 } else { 3319 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3320 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3321 if (i >= start && j < end) { 3322 sameDist[0] = PETSC_TRUE; 3323 } 3324 } 3325 3326 /* Check if iscol has same processor distribution as mat */ 3327 sameDist[1] = PETSC_FALSE; 3328 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3329 if (!n) { 3330 sameDist[1] = PETSC_TRUE; 3331 } else { 3332 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3333 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3334 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3335 } 3336 3337 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3338 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3339 sameRowDist = tsameDist[0]; 3340 } 3341 3342 if (sameRowDist) { 3343 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3344 /* isrow and iscol have same processor distribution as mat */ 3345 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3346 PetscFunctionReturn(0); 3347 } else { /* sameRowDist */ 3348 /* isrow has same processor distribution as mat */ 3349 if (call == MAT_INITIAL_MATRIX) { 3350 PetscBool sorted; 3351 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3352 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3353 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3354 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3355 3356 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3357 if (sorted) { 3358 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3359 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3360 PetscFunctionReturn(0); 3361 } 3362 } else { /* call == MAT_REUSE_MATRIX */ 3363 IS iscol_sub; 3364 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3365 if (iscol_sub) { 3366 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3367 PetscFunctionReturn(0); 3368 } 3369 } 3370 } 3371 } 3372 3373 /* General case: iscol -> iscol_local which has global size of iscol */ 3374 if (call == MAT_REUSE_MATRIX) { 3375 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3376 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3377 } else { 3378 if (!iscol_local) { 3379 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3380 } 3381 } 3382 3383 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3384 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3385 3386 if (call == MAT_INITIAL_MATRIX) { 3387 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3388 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3389 } 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /*@C 3394 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3395 and "off-diagonal" part of the matrix in CSR format. 3396 3397 Collective on MPI_Comm 3398 3399 Input Parameters: 3400 + comm - MPI communicator 3401 . A - "diagonal" portion of matrix 3402 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3403 - garray - global index of B columns 3404 3405 Output Parameter: 3406 . mat - the matrix, with input A as its local diagonal matrix 3407 Level: advanced 3408 3409 Notes: 3410 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3411 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3412 3413 .seealso: MatCreateMPIAIJWithSplitArrays() 3414 @*/ 3415 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3416 { 3417 PetscErrorCode ierr; 3418 Mat_MPIAIJ *maij; 3419 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3420 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3421 PetscScalar *oa=b->a; 3422 Mat Bnew; 3423 PetscInt m,n,N; 3424 3425 PetscFunctionBegin; 3426 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3427 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3428 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3429 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3430 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3431 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3432 3433 /* Get global columns of mat */ 3434 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3435 3436 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3437 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3438 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3439 maij = (Mat_MPIAIJ*)(*mat)->data; 3440 3441 (*mat)->preallocated = PETSC_TRUE; 3442 3443 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3444 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3445 3446 /* Set A as diagonal portion of *mat */ 3447 maij->A = A; 3448 3449 nz = oi[m]; 3450 for (i=0; i<nz; i++) { 3451 col = oj[i]; 3452 oj[i] = garray[col]; 3453 } 3454 3455 /* Set Bnew as off-diagonal portion of *mat */ 3456 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3457 bnew = (Mat_SeqAIJ*)Bnew->data; 3458 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3459 maij->B = Bnew; 3460 3461 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3462 3463 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3464 b->free_a = PETSC_FALSE; 3465 b->free_ij = PETSC_FALSE; 3466 ierr = MatDestroy(&B);CHKERRQ(ierr); 3467 3468 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3469 bnew->free_a = PETSC_TRUE; 3470 bnew->free_ij = PETSC_TRUE; 3471 3472 /* condense columns of maij->B */ 3473 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3474 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3475 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3476 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3477 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3478 PetscFunctionReturn(0); 3479 } 3480 3481 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3482 3483 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3484 { 3485 PetscErrorCode ierr; 3486 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3487 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3488 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3489 Mat M,Msub,B=a->B; 3490 MatScalar *aa; 3491 Mat_SeqAIJ *aij; 3492 PetscInt *garray = a->garray,*colsub,Ncols; 3493 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3494 IS iscol_sub,iscmap; 3495 const PetscInt *is_idx,*cmap; 3496 PetscBool allcolumns=PETSC_FALSE; 3497 MPI_Comm comm; 3498 3499 PetscFunctionBegin; 3500 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3501 3502 if (call == MAT_REUSE_MATRIX) { 3503 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3504 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3505 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3506 3507 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3508 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3509 3510 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3511 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3512 3513 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3514 3515 } else { /* call == MAT_INITIAL_MATRIX) */ 3516 PetscBool flg; 3517 3518 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3519 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3520 3521 /* (1) iscol -> nonscalable iscol_local */ 3522 /* Check for special case: each processor gets entire matrix columns */ 3523 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3524 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3525 if (allcolumns) { 3526 iscol_sub = iscol_local; 3527 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3528 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3529 3530 } else { 3531 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3532 PetscInt *idx,*cmap1,k; 3533 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3534 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3535 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3536 count = 0; 3537 k = 0; 3538 for (i=0; i<Ncols; i++) { 3539 j = is_idx[i]; 3540 if (j >= cstart && j < cend) { 3541 /* diagonal part of mat */ 3542 idx[count] = j; 3543 cmap1[count++] = i; /* column index in submat */ 3544 } else if (Bn) { 3545 /* off-diagonal part of mat */ 3546 if (j == garray[k]) { 3547 idx[count] = j; 3548 cmap1[count++] = i; /* column index in submat */ 3549 } else if (j > garray[k]) { 3550 while (j > garray[k] && k < Bn-1) k++; 3551 if (j == garray[k]) { 3552 idx[count] = j; 3553 cmap1[count++] = i; /* column index in submat */ 3554 } 3555 } 3556 } 3557 } 3558 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3559 3560 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3561 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3562 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3563 3564 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3565 } 3566 3567 /* (3) Create sequential Msub */ 3568 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3569 } 3570 3571 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3572 aij = (Mat_SeqAIJ*)(Msub)->data; 3573 ii = aij->i; 3574 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3575 3576 /* 3577 m - number of local rows 3578 Ncols - number of columns (same on all processors) 3579 rstart - first row in new global matrix generated 3580 */ 3581 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3582 3583 if (call == MAT_INITIAL_MATRIX) { 3584 /* (4) Create parallel newmat */ 3585 PetscMPIInt rank,size; 3586 PetscInt csize; 3587 3588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3590 3591 /* 3592 Determine the number of non-zeros in the diagonal and off-diagonal 3593 portions of the matrix in order to do correct preallocation 3594 */ 3595 3596 /* first get start and end of "diagonal" columns */ 3597 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3598 if (csize == PETSC_DECIDE) { 3599 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3600 if (mglobal == Ncols) { /* square matrix */ 3601 nlocal = m; 3602 } else { 3603 nlocal = Ncols/size + ((Ncols % size) > rank); 3604 } 3605 } else { 3606 nlocal = csize; 3607 } 3608 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3609 rstart = rend - nlocal; 3610 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3611 3612 /* next, compute all the lengths */ 3613 jj = aij->j; 3614 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3615 olens = dlens + m; 3616 for (i=0; i<m; i++) { 3617 jend = ii[i+1] - ii[i]; 3618 olen = 0; 3619 dlen = 0; 3620 for (j=0; j<jend; j++) { 3621 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3622 else dlen++; 3623 jj++; 3624 } 3625 olens[i] = olen; 3626 dlens[i] = dlen; 3627 } 3628 3629 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3630 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3631 3632 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3633 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3634 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3635 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3636 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3637 ierr = PetscFree(dlens);CHKERRQ(ierr); 3638 3639 } else { /* call == MAT_REUSE_MATRIX */ 3640 M = *newmat; 3641 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3642 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3643 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3644 /* 3645 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3646 rather than the slower MatSetValues(). 3647 */ 3648 M->was_assembled = PETSC_TRUE; 3649 M->assembled = PETSC_FALSE; 3650 } 3651 3652 /* (5) Set values of Msub to *newmat */ 3653 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3654 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3655 3656 jj = aij->j; 3657 aa = aij->a; 3658 for (i=0; i<m; i++) { 3659 row = rstart + i; 3660 nz = ii[i+1] - ii[i]; 3661 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3662 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3663 jj += nz; aa += nz; 3664 } 3665 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3666 3667 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3668 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3669 3670 ierr = PetscFree(colsub);CHKERRQ(ierr); 3671 3672 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3673 if (call == MAT_INITIAL_MATRIX) { 3674 *newmat = M; 3675 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3676 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3677 3678 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3679 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3680 3681 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3682 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3683 3684 if (iscol_local) { 3685 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3686 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3687 } 3688 } 3689 PetscFunctionReturn(0); 3690 } 3691 3692 /* 3693 Not great since it makes two copies of the submatrix, first an SeqAIJ 3694 in local and then by concatenating the local matrices the end result. 3695 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3696 3697 Note: This requires a sequential iscol with all indices. 3698 */ 3699 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3700 { 3701 PetscErrorCode ierr; 3702 PetscMPIInt rank,size; 3703 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3704 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3705 Mat M,Mreuse; 3706 MatScalar *aa,*vwork; 3707 MPI_Comm comm; 3708 Mat_SeqAIJ *aij; 3709 PetscBool colflag,allcolumns=PETSC_FALSE; 3710 3711 PetscFunctionBegin; 3712 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3713 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3714 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3715 3716 /* Check for special case: each processor gets entire matrix columns */ 3717 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3718 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3719 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3720 3721 if (call == MAT_REUSE_MATRIX) { 3722 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3723 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3724 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3725 } else { 3726 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3727 } 3728 3729 /* 3730 m - number of local rows 3731 n - number of columns (same on all processors) 3732 rstart - first row in new global matrix generated 3733 */ 3734 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3735 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3736 if (call == MAT_INITIAL_MATRIX) { 3737 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3738 ii = aij->i; 3739 jj = aij->j; 3740 3741 /* 3742 Determine the number of non-zeros in the diagonal and off-diagonal 3743 portions of the matrix in order to do correct preallocation 3744 */ 3745 3746 /* first get start and end of "diagonal" columns */ 3747 if (csize == PETSC_DECIDE) { 3748 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3749 if (mglobal == n) { /* square matrix */ 3750 nlocal = m; 3751 } else { 3752 nlocal = n/size + ((n % size) > rank); 3753 } 3754 } else { 3755 nlocal = csize; 3756 } 3757 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3758 rstart = rend - nlocal; 3759 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3760 3761 /* next, compute all the lengths */ 3762 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3763 olens = dlens + m; 3764 for (i=0; i<m; i++) { 3765 jend = ii[i+1] - ii[i]; 3766 olen = 0; 3767 dlen = 0; 3768 for (j=0; j<jend; j++) { 3769 if (*jj < rstart || *jj >= rend) olen++; 3770 else dlen++; 3771 jj++; 3772 } 3773 olens[i] = olen; 3774 dlens[i] = dlen; 3775 } 3776 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3777 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3778 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3779 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3780 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3781 ierr = PetscFree(dlens);CHKERRQ(ierr); 3782 } else { 3783 PetscInt ml,nl; 3784 3785 M = *newmat; 3786 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3787 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3788 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3789 /* 3790 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3791 rather than the slower MatSetValues(). 3792 */ 3793 M->was_assembled = PETSC_TRUE; 3794 M->assembled = PETSC_FALSE; 3795 } 3796 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3797 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3798 ii = aij->i; 3799 jj = aij->j; 3800 aa = aij->a; 3801 for (i=0; i<m; i++) { 3802 row = rstart + i; 3803 nz = ii[i+1] - ii[i]; 3804 cwork = jj; jj += nz; 3805 vwork = aa; aa += nz; 3806 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3807 } 3808 3809 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 *newmat = M; 3812 3813 /* save submatrix used in processor for next request */ 3814 if (call == MAT_INITIAL_MATRIX) { 3815 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3816 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3817 } 3818 PetscFunctionReturn(0); 3819 } 3820 3821 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3822 { 3823 PetscInt m,cstart, cend,j,nnz,i,d; 3824 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3825 const PetscInt *JJ; 3826 PetscScalar *values; 3827 PetscErrorCode ierr; 3828 PetscBool nooffprocentries; 3829 3830 PetscFunctionBegin; 3831 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3832 3833 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3834 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3835 m = B->rmap->n; 3836 cstart = B->cmap->rstart; 3837 cend = B->cmap->rend; 3838 rstart = B->rmap->rstart; 3839 3840 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3841 3842 #if defined(PETSC_USE_DEBUG) 3843 for (i=0; i<m; i++) { 3844 nnz = Ii[i+1]- Ii[i]; 3845 JJ = J + Ii[i]; 3846 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3847 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3848 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3849 } 3850 #endif 3851 3852 for (i=0; i<m; i++) { 3853 nnz = Ii[i+1]- Ii[i]; 3854 JJ = J + Ii[i]; 3855 nnz_max = PetscMax(nnz_max,nnz); 3856 d = 0; 3857 for (j=0; j<nnz; j++) { 3858 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3859 } 3860 d_nnz[i] = d; 3861 o_nnz[i] = nnz - d; 3862 } 3863 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3864 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3865 3866 if (v) values = (PetscScalar*)v; 3867 else { 3868 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3869 } 3870 3871 for (i=0; i<m; i++) { 3872 ii = i + rstart; 3873 nnz = Ii[i+1]- Ii[i]; 3874 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3875 } 3876 nooffprocentries = B->nooffprocentries; 3877 B->nooffprocentries = PETSC_TRUE; 3878 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3879 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3880 B->nooffprocentries = nooffprocentries; 3881 3882 if (!v) { 3883 ierr = PetscFree(values);CHKERRQ(ierr); 3884 } 3885 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3886 PetscFunctionReturn(0); 3887 } 3888 3889 /*@ 3890 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3891 (the default parallel PETSc format). 3892 3893 Collective on MPI_Comm 3894 3895 Input Parameters: 3896 + B - the matrix 3897 . i - the indices into j for the start of each local row (starts with zero) 3898 . j - the column indices for each local row (starts with zero) 3899 - v - optional values in the matrix 3900 3901 Level: developer 3902 3903 Notes: 3904 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3905 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3906 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3907 3908 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3909 3910 The format which is used for the sparse matrix input, is equivalent to a 3911 row-major ordering.. i.e for the following matrix, the input data expected is 3912 as shown 3913 3914 $ 1 0 0 3915 $ 2 0 3 P0 3916 $ ------- 3917 $ 4 5 6 P1 3918 $ 3919 $ Process0 [P0]: rows_owned=[0,1] 3920 $ i = {0,1,3} [size = nrow+1 = 2+1] 3921 $ j = {0,0,2} [size = 3] 3922 $ v = {1,2,3} [size = 3] 3923 $ 3924 $ Process1 [P1]: rows_owned=[2] 3925 $ i = {0,3} [size = nrow+1 = 1+1] 3926 $ j = {0,1,2} [size = 3] 3927 $ v = {4,5,6} [size = 3] 3928 3929 .keywords: matrix, aij, compressed row, sparse, parallel 3930 3931 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3932 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3933 @*/ 3934 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3935 { 3936 PetscErrorCode ierr; 3937 3938 PetscFunctionBegin; 3939 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3940 PetscFunctionReturn(0); 3941 } 3942 3943 /*@C 3944 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3945 (the default parallel PETSc format). For good matrix assembly performance 3946 the user should preallocate the matrix storage by setting the parameters 3947 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3948 performance can be increased by more than a factor of 50. 3949 3950 Collective on MPI_Comm 3951 3952 Input Parameters: 3953 + B - the matrix 3954 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3955 (same value is used for all local rows) 3956 . d_nnz - array containing the number of nonzeros in the various rows of the 3957 DIAGONAL portion of the local submatrix (possibly different for each row) 3958 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3959 The size of this array is equal to the number of local rows, i.e 'm'. 3960 For matrices that will be factored, you must leave room for (and set) 3961 the diagonal entry even if it is zero. 3962 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3963 submatrix (same value is used for all local rows). 3964 - o_nnz - array containing the number of nonzeros in the various rows of the 3965 OFF-DIAGONAL portion of the local submatrix (possibly different for 3966 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3967 structure. The size of this array is equal to the number 3968 of local rows, i.e 'm'. 3969 3970 If the *_nnz parameter is given then the *_nz parameter is ignored 3971 3972 The AIJ format (also called the Yale sparse matrix format or 3973 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3974 storage. The stored row and column indices begin with zero. 3975 See Users-Manual: ch_mat for details. 3976 3977 The parallel matrix is partitioned such that the first m0 rows belong to 3978 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3979 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3980 3981 The DIAGONAL portion of the local submatrix of a processor can be defined 3982 as the submatrix which is obtained by extraction the part corresponding to 3983 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3984 first row that belongs to the processor, r2 is the last row belonging to 3985 the this processor, and c1-c2 is range of indices of the local part of a 3986 vector suitable for applying the matrix to. This is an mxn matrix. In the 3987 common case of a square matrix, the row and column ranges are the same and 3988 the DIAGONAL part is also square. The remaining portion of the local 3989 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3990 3991 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3992 3993 You can call MatGetInfo() to get information on how effective the preallocation was; 3994 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3995 You can also run with the option -info and look for messages with the string 3996 malloc in them to see if additional memory allocation was needed. 3997 3998 Example usage: 3999 4000 Consider the following 8x8 matrix with 34 non-zero values, that is 4001 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4002 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4003 as follows: 4004 4005 .vb 4006 1 2 0 | 0 3 0 | 0 4 4007 Proc0 0 5 6 | 7 0 0 | 8 0 4008 9 0 10 | 11 0 0 | 12 0 4009 ------------------------------------- 4010 13 0 14 | 15 16 17 | 0 0 4011 Proc1 0 18 0 | 19 20 21 | 0 0 4012 0 0 0 | 22 23 0 | 24 0 4013 ------------------------------------- 4014 Proc2 25 26 27 | 0 0 28 | 29 0 4015 30 0 0 | 31 32 33 | 0 34 4016 .ve 4017 4018 This can be represented as a collection of submatrices as: 4019 4020 .vb 4021 A B C 4022 D E F 4023 G H I 4024 .ve 4025 4026 Where the submatrices A,B,C are owned by proc0, D,E,F are 4027 owned by proc1, G,H,I are owned by proc2. 4028 4029 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4030 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4031 The 'M','N' parameters are 8,8, and have the same values on all procs. 4032 4033 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4034 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4035 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4036 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4037 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4038 matrix, ans [DF] as another SeqAIJ matrix. 4039 4040 When d_nz, o_nz parameters are specified, d_nz storage elements are 4041 allocated for every row of the local diagonal submatrix, and o_nz 4042 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4043 One way to choose d_nz and o_nz is to use the max nonzerors per local 4044 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4045 In this case, the values of d_nz,o_nz are: 4046 .vb 4047 proc0 : dnz = 2, o_nz = 2 4048 proc1 : dnz = 3, o_nz = 2 4049 proc2 : dnz = 1, o_nz = 4 4050 .ve 4051 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4052 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4053 for proc3. i.e we are using 12+15+10=37 storage locations to store 4054 34 values. 4055 4056 When d_nnz, o_nnz parameters are specified, the storage is specified 4057 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4058 In the above case the values for d_nnz,o_nnz are: 4059 .vb 4060 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4061 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4062 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4063 .ve 4064 Here the space allocated is sum of all the above values i.e 34, and 4065 hence pre-allocation is perfect. 4066 4067 Level: intermediate 4068 4069 .keywords: matrix, aij, compressed row, sparse, parallel 4070 4071 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4072 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4073 @*/ 4074 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4075 { 4076 PetscErrorCode ierr; 4077 4078 PetscFunctionBegin; 4079 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4080 PetscValidType(B,1); 4081 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4082 PetscFunctionReturn(0); 4083 } 4084 4085 /*@ 4086 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4087 CSR format the local rows. 4088 4089 Collective on MPI_Comm 4090 4091 Input Parameters: 4092 + comm - MPI communicator 4093 . m - number of local rows (Cannot be PETSC_DECIDE) 4094 . n - This value should be the same as the local size used in creating the 4095 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4096 calculated if N is given) For square matrices n is almost always m. 4097 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4098 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4099 . i - row indices 4100 . j - column indices 4101 - a - matrix values 4102 4103 Output Parameter: 4104 . mat - the matrix 4105 4106 Level: intermediate 4107 4108 Notes: 4109 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4110 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4111 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4112 4113 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4114 4115 The format which is used for the sparse matrix input, is equivalent to a 4116 row-major ordering.. i.e for the following matrix, the input data expected is 4117 as shown 4118 4119 $ 1 0 0 4120 $ 2 0 3 P0 4121 $ ------- 4122 $ 4 5 6 P1 4123 $ 4124 $ Process0 [P0]: rows_owned=[0,1] 4125 $ i = {0,1,3} [size = nrow+1 = 2+1] 4126 $ j = {0,0,2} [size = 3] 4127 $ v = {1,2,3} [size = 3] 4128 $ 4129 $ Process1 [P1]: rows_owned=[2] 4130 $ i = {0,3} [size = nrow+1 = 1+1] 4131 $ j = {0,1,2} [size = 3] 4132 $ v = {4,5,6} [size = 3] 4133 4134 .keywords: matrix, aij, compressed row, sparse, parallel 4135 4136 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4137 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4138 @*/ 4139 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4140 { 4141 PetscErrorCode ierr; 4142 4143 PetscFunctionBegin; 4144 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4145 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4146 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4147 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4148 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4149 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4150 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4151 PetscFunctionReturn(0); 4152 } 4153 4154 /*@C 4155 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4156 (the default parallel PETSc format). For good matrix assembly performance 4157 the user should preallocate the matrix storage by setting the parameters 4158 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4159 performance can be increased by more than a factor of 50. 4160 4161 Collective on MPI_Comm 4162 4163 Input Parameters: 4164 + comm - MPI communicator 4165 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4166 This value should be the same as the local size used in creating the 4167 y vector for the matrix-vector product y = Ax. 4168 . n - This value should be the same as the local size used in creating the 4169 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4170 calculated if N is given) For square matrices n is almost always m. 4171 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4172 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4173 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4174 (same value is used for all local rows) 4175 . d_nnz - array containing the number of nonzeros in the various rows of the 4176 DIAGONAL portion of the local submatrix (possibly different for each row) 4177 or NULL, if d_nz is used to specify the nonzero structure. 4178 The size of this array is equal to the number of local rows, i.e 'm'. 4179 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4180 submatrix (same value is used for all local rows). 4181 - o_nnz - array containing the number of nonzeros in the various rows of the 4182 OFF-DIAGONAL portion of the local submatrix (possibly different for 4183 each row) or NULL, if o_nz is used to specify the nonzero 4184 structure. The size of this array is equal to the number 4185 of local rows, i.e 'm'. 4186 4187 Output Parameter: 4188 . A - the matrix 4189 4190 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4191 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4192 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4193 4194 Notes: 4195 If the *_nnz parameter is given then the *_nz parameter is ignored 4196 4197 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4198 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4199 storage requirements for this matrix. 4200 4201 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4202 processor than it must be used on all processors that share the object for 4203 that argument. 4204 4205 The user MUST specify either the local or global matrix dimensions 4206 (possibly both). 4207 4208 The parallel matrix is partitioned across processors such that the 4209 first m0 rows belong to process 0, the next m1 rows belong to 4210 process 1, the next m2 rows belong to process 2 etc.. where 4211 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4212 values corresponding to [m x N] submatrix. 4213 4214 The columns are logically partitioned with the n0 columns belonging 4215 to 0th partition, the next n1 columns belonging to the next 4216 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4217 4218 The DIAGONAL portion of the local submatrix on any given processor 4219 is the submatrix corresponding to the rows and columns m,n 4220 corresponding to the given processor. i.e diagonal matrix on 4221 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4222 etc. The remaining portion of the local submatrix [m x (N-n)] 4223 constitute the OFF-DIAGONAL portion. The example below better 4224 illustrates this concept. 4225 4226 For a square global matrix we define each processor's diagonal portion 4227 to be its local rows and the corresponding columns (a square submatrix); 4228 each processor's off-diagonal portion encompasses the remainder of the 4229 local matrix (a rectangular submatrix). 4230 4231 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4232 4233 When calling this routine with a single process communicator, a matrix of 4234 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4235 type of communicator, use the construction mechanism 4236 .vb 4237 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4238 .ve 4239 4240 $ MatCreate(...,&A); 4241 $ MatSetType(A,MATMPIAIJ); 4242 $ MatSetSizes(A, m,n,M,N); 4243 $ MatMPIAIJSetPreallocation(A,...); 4244 4245 By default, this format uses inodes (identical nodes) when possible. 4246 We search for consecutive rows with the same nonzero structure, thereby 4247 reusing matrix information to achieve increased efficiency. 4248 4249 Options Database Keys: 4250 + -mat_no_inode - Do not use inodes 4251 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4252 4253 4254 4255 Example usage: 4256 4257 Consider the following 8x8 matrix with 34 non-zero values, that is 4258 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4259 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4260 as follows 4261 4262 .vb 4263 1 2 0 | 0 3 0 | 0 4 4264 Proc0 0 5 6 | 7 0 0 | 8 0 4265 9 0 10 | 11 0 0 | 12 0 4266 ------------------------------------- 4267 13 0 14 | 15 16 17 | 0 0 4268 Proc1 0 18 0 | 19 20 21 | 0 0 4269 0 0 0 | 22 23 0 | 24 0 4270 ------------------------------------- 4271 Proc2 25 26 27 | 0 0 28 | 29 0 4272 30 0 0 | 31 32 33 | 0 34 4273 .ve 4274 4275 This can be represented as a collection of submatrices as 4276 4277 .vb 4278 A B C 4279 D E F 4280 G H I 4281 .ve 4282 4283 Where the submatrices A,B,C are owned by proc0, D,E,F are 4284 owned by proc1, G,H,I are owned by proc2. 4285 4286 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4287 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4288 The 'M','N' parameters are 8,8, and have the same values on all procs. 4289 4290 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4291 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4292 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4293 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4294 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4295 matrix, ans [DF] as another SeqAIJ matrix. 4296 4297 When d_nz, o_nz parameters are specified, d_nz storage elements are 4298 allocated for every row of the local diagonal submatrix, and o_nz 4299 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4300 One way to choose d_nz and o_nz is to use the max nonzerors per local 4301 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4302 In this case, the values of d_nz,o_nz are 4303 .vb 4304 proc0 : dnz = 2, o_nz = 2 4305 proc1 : dnz = 3, o_nz = 2 4306 proc2 : dnz = 1, o_nz = 4 4307 .ve 4308 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4309 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4310 for proc3. i.e we are using 12+15+10=37 storage locations to store 4311 34 values. 4312 4313 When d_nnz, o_nnz parameters are specified, the storage is specified 4314 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4315 In the above case the values for d_nnz,o_nnz are 4316 .vb 4317 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4318 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4319 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4320 .ve 4321 Here the space allocated is sum of all the above values i.e 34, and 4322 hence pre-allocation is perfect. 4323 4324 Level: intermediate 4325 4326 .keywords: matrix, aij, compressed row, sparse, parallel 4327 4328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4329 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4330 @*/ 4331 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4332 { 4333 PetscErrorCode ierr; 4334 PetscMPIInt size; 4335 4336 PetscFunctionBegin; 4337 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4338 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4339 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4340 if (size > 1) { 4341 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4342 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4343 } else { 4344 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4345 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4346 } 4347 PetscFunctionReturn(0); 4348 } 4349 4350 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4351 { 4352 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4353 PetscBool flg; 4354 PetscErrorCode ierr; 4355 4356 PetscFunctionBegin; 4357 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4358 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4359 if (Ad) *Ad = a->A; 4360 if (Ao) *Ao = a->B; 4361 if (colmap) *colmap = a->garray; 4362 PetscFunctionReturn(0); 4363 } 4364 4365 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4366 { 4367 PetscErrorCode ierr; 4368 PetscInt m,N,i,rstart,nnz,Ii; 4369 PetscInt *indx; 4370 PetscScalar *values; 4371 4372 PetscFunctionBegin; 4373 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4374 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4375 PetscInt *dnz,*onz,sum,bs,cbs; 4376 4377 if (n == PETSC_DECIDE) { 4378 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4379 } 4380 /* Check sum(n) = N */ 4381 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4382 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4383 4384 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4385 rstart -= m; 4386 4387 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4388 for (i=0; i<m; i++) { 4389 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4390 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4391 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4392 } 4393 4394 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4395 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4396 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4397 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4398 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4399 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4400 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4401 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4402 } 4403 4404 /* numeric phase */ 4405 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4406 for (i=0; i<m; i++) { 4407 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4408 Ii = i + rstart; 4409 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4410 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4411 } 4412 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4413 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 4417 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4418 { 4419 PetscErrorCode ierr; 4420 PetscMPIInt rank; 4421 PetscInt m,N,i,rstart,nnz; 4422 size_t len; 4423 const PetscInt *indx; 4424 PetscViewer out; 4425 char *name; 4426 Mat B; 4427 const PetscScalar *values; 4428 4429 PetscFunctionBegin; 4430 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4431 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4432 /* Should this be the type of the diagonal block of A? */ 4433 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4434 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4435 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4436 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4437 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4438 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4439 for (i=0; i<m; i++) { 4440 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4441 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4442 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4443 } 4444 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4445 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4446 4447 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4448 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4449 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4450 sprintf(name,"%s.%d",outfile,rank); 4451 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4452 ierr = PetscFree(name);CHKERRQ(ierr); 4453 ierr = MatView(B,out);CHKERRQ(ierr); 4454 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4455 ierr = MatDestroy(&B);CHKERRQ(ierr); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4460 { 4461 PetscErrorCode ierr; 4462 Mat_Merge_SeqsToMPI *merge; 4463 PetscContainer container; 4464 4465 PetscFunctionBegin; 4466 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4467 if (container) { 4468 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4481 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4482 ierr = PetscFree(merge);CHKERRQ(ierr); 4483 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4484 } 4485 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4486 PetscFunctionReturn(0); 4487 } 4488 4489 #include <../src/mat/utils/freespace.h> 4490 #include <petscbt.h> 4491 4492 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4493 { 4494 PetscErrorCode ierr; 4495 MPI_Comm comm; 4496 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4497 PetscMPIInt size,rank,taga,*len_s; 4498 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4499 PetscInt proc,m; 4500 PetscInt **buf_ri,**buf_rj; 4501 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4502 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4503 MPI_Request *s_waits,*r_waits; 4504 MPI_Status *status; 4505 MatScalar *aa=a->a; 4506 MatScalar **abuf_r,*ba_i; 4507 Mat_Merge_SeqsToMPI *merge; 4508 PetscContainer container; 4509 4510 PetscFunctionBegin; 4511 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4512 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4513 4514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4516 4517 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4518 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4519 4520 bi = merge->bi; 4521 bj = merge->bj; 4522 buf_ri = merge->buf_ri; 4523 buf_rj = merge->buf_rj; 4524 4525 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4526 owners = merge->rowmap->range; 4527 len_s = merge->len_s; 4528 4529 /* send and recv matrix values */ 4530 /*-----------------------------*/ 4531 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4532 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4533 4534 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4535 for (proc=0,k=0; proc<size; proc++) { 4536 if (!len_s[proc]) continue; 4537 i = owners[proc]; 4538 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4539 k++; 4540 } 4541 4542 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4543 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4544 ierr = PetscFree(status);CHKERRQ(ierr); 4545 4546 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4547 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4548 4549 /* insert mat values of mpimat */ 4550 /*----------------------------*/ 4551 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4552 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4553 4554 for (k=0; k<merge->nrecv; k++) { 4555 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4556 nrows = *(buf_ri_k[k]); 4557 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4558 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4559 } 4560 4561 /* set values of ba */ 4562 m = merge->rowmap->n; 4563 for (i=0; i<m; i++) { 4564 arow = owners[rank] + i; 4565 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4566 bnzi = bi[i+1] - bi[i]; 4567 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4568 4569 /* add local non-zero vals of this proc's seqmat into ba */ 4570 anzi = ai[arow+1] - ai[arow]; 4571 aj = a->j + ai[arow]; 4572 aa = a->a + ai[arow]; 4573 nextaj = 0; 4574 for (j=0; nextaj<anzi; j++) { 4575 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4576 ba_i[j] += aa[nextaj++]; 4577 } 4578 } 4579 4580 /* add received vals into ba */ 4581 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4582 /* i-th row */ 4583 if (i == *nextrow[k]) { 4584 anzi = *(nextai[k]+1) - *nextai[k]; 4585 aj = buf_rj[k] + *(nextai[k]); 4586 aa = abuf_r[k] + *(nextai[k]); 4587 nextaj = 0; 4588 for (j=0; nextaj<anzi; j++) { 4589 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4590 ba_i[j] += aa[nextaj++]; 4591 } 4592 } 4593 nextrow[k]++; nextai[k]++; 4594 } 4595 } 4596 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4597 } 4598 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4600 4601 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4602 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4603 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4604 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4605 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4606 PetscFunctionReturn(0); 4607 } 4608 4609 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4610 { 4611 PetscErrorCode ierr; 4612 Mat B_mpi; 4613 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4614 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4615 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4616 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4617 PetscInt len,proc,*dnz,*onz,bs,cbs; 4618 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4619 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4620 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4621 MPI_Status *status; 4622 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4623 PetscBT lnkbt; 4624 Mat_Merge_SeqsToMPI *merge; 4625 PetscContainer container; 4626 4627 PetscFunctionBegin; 4628 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4629 4630 /* make sure it is a PETSc comm */ 4631 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4632 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4633 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4634 4635 ierr = PetscNew(&merge);CHKERRQ(ierr); 4636 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4637 4638 /* determine row ownership */ 4639 /*---------------------------------------------------------*/ 4640 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4641 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4642 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4643 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4644 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4645 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4646 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4647 4648 m = merge->rowmap->n; 4649 owners = merge->rowmap->range; 4650 4651 /* determine the number of messages to send, their lengths */ 4652 /*---------------------------------------------------------*/ 4653 len_s = merge->len_s; 4654 4655 len = 0; /* length of buf_si[] */ 4656 merge->nsend = 0; 4657 for (proc=0; proc<size; proc++) { 4658 len_si[proc] = 0; 4659 if (proc == rank) { 4660 len_s[proc] = 0; 4661 } else { 4662 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4663 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4664 } 4665 if (len_s[proc]) { 4666 merge->nsend++; 4667 nrows = 0; 4668 for (i=owners[proc]; i<owners[proc+1]; i++) { 4669 if (ai[i+1] > ai[i]) nrows++; 4670 } 4671 len_si[proc] = 2*(nrows+1); 4672 len += len_si[proc]; 4673 } 4674 } 4675 4676 /* determine the number and length of messages to receive for ij-structure */ 4677 /*-------------------------------------------------------------------------*/ 4678 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4679 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4680 4681 /* post the Irecv of j-structure */ 4682 /*-------------------------------*/ 4683 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4684 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4685 4686 /* post the Isend of j-structure */ 4687 /*--------------------------------*/ 4688 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4689 4690 for (proc=0, k=0; proc<size; proc++) { 4691 if (!len_s[proc]) continue; 4692 i = owners[proc]; 4693 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4694 k++; 4695 } 4696 4697 /* receives and sends of j-structure are complete */ 4698 /*------------------------------------------------*/ 4699 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4700 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4701 4702 /* send and recv i-structure */ 4703 /*---------------------------*/ 4704 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4705 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4706 4707 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4708 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4709 for (proc=0,k=0; proc<size; proc++) { 4710 if (!len_s[proc]) continue; 4711 /* form outgoing message for i-structure: 4712 buf_si[0]: nrows to be sent 4713 [1:nrows]: row index (global) 4714 [nrows+1:2*nrows+1]: i-structure index 4715 */ 4716 /*-------------------------------------------*/ 4717 nrows = len_si[proc]/2 - 1; 4718 buf_si_i = buf_si + nrows+1; 4719 buf_si[0] = nrows; 4720 buf_si_i[0] = 0; 4721 nrows = 0; 4722 for (i=owners[proc]; i<owners[proc+1]; i++) { 4723 anzi = ai[i+1] - ai[i]; 4724 if (anzi) { 4725 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4726 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4727 nrows++; 4728 } 4729 } 4730 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4731 k++; 4732 buf_si += len_si[proc]; 4733 } 4734 4735 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4736 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4737 4738 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4739 for (i=0; i<merge->nrecv; i++) { 4740 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4741 } 4742 4743 ierr = PetscFree(len_si);CHKERRQ(ierr); 4744 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4745 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4746 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4747 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4748 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4749 ierr = PetscFree(status);CHKERRQ(ierr); 4750 4751 /* compute a local seq matrix in each processor */ 4752 /*----------------------------------------------*/ 4753 /* allocate bi array and free space for accumulating nonzero column info */ 4754 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4755 bi[0] = 0; 4756 4757 /* create and initialize a linked list */ 4758 nlnk = N+1; 4759 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4760 4761 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4762 len = ai[owners[rank+1]] - ai[owners[rank]]; 4763 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4764 4765 current_space = free_space; 4766 4767 /* determine symbolic info for each local row */ 4768 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4769 4770 for (k=0; k<merge->nrecv; k++) { 4771 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4772 nrows = *buf_ri_k[k]; 4773 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4774 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4775 } 4776 4777 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4778 len = 0; 4779 for (i=0; i<m; i++) { 4780 bnzi = 0; 4781 /* add local non-zero cols of this proc's seqmat into lnk */ 4782 arow = owners[rank] + i; 4783 anzi = ai[arow+1] - ai[arow]; 4784 aj = a->j + ai[arow]; 4785 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4786 bnzi += nlnk; 4787 /* add received col data into lnk */ 4788 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4789 if (i == *nextrow[k]) { /* i-th row */ 4790 anzi = *(nextai[k]+1) - *nextai[k]; 4791 aj = buf_rj[k] + *nextai[k]; 4792 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4793 bnzi += nlnk; 4794 nextrow[k]++; nextai[k]++; 4795 } 4796 } 4797 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4798 4799 /* if free space is not available, make more free space */ 4800 if (current_space->local_remaining<bnzi) { 4801 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4802 nspacedouble++; 4803 } 4804 /* copy data into free space, then initialize lnk */ 4805 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4806 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4807 4808 current_space->array += bnzi; 4809 current_space->local_used += bnzi; 4810 current_space->local_remaining -= bnzi; 4811 4812 bi[i+1] = bi[i] + bnzi; 4813 } 4814 4815 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4816 4817 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4818 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4819 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4820 4821 /* create symbolic parallel matrix B_mpi */ 4822 /*---------------------------------------*/ 4823 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4824 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4825 if (n==PETSC_DECIDE) { 4826 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4827 } else { 4828 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4829 } 4830 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4831 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4832 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4833 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4834 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4835 4836 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4837 B_mpi->assembled = PETSC_FALSE; 4838 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4839 merge->bi = bi; 4840 merge->bj = bj; 4841 merge->buf_ri = buf_ri; 4842 merge->buf_rj = buf_rj; 4843 merge->coi = NULL; 4844 merge->coj = NULL; 4845 merge->owners_co = NULL; 4846 4847 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4848 4849 /* attach the supporting struct to B_mpi for reuse */ 4850 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4851 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4852 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4853 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4854 *mpimat = B_mpi; 4855 4856 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4857 PetscFunctionReturn(0); 4858 } 4859 4860 /*@C 4861 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4862 matrices from each processor 4863 4864 Collective on MPI_Comm 4865 4866 Input Parameters: 4867 + comm - the communicators the parallel matrix will live on 4868 . seqmat - the input sequential matrices 4869 . m - number of local rows (or PETSC_DECIDE) 4870 . n - number of local columns (or PETSC_DECIDE) 4871 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4872 4873 Output Parameter: 4874 . mpimat - the parallel matrix generated 4875 4876 Level: advanced 4877 4878 Notes: 4879 The dimensions of the sequential matrix in each processor MUST be the same. 4880 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4881 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4882 @*/ 4883 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4884 { 4885 PetscErrorCode ierr; 4886 PetscMPIInt size; 4887 4888 PetscFunctionBegin; 4889 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4890 if (size == 1) { 4891 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4892 if (scall == MAT_INITIAL_MATRIX) { 4893 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4894 } else { 4895 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4896 } 4897 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4898 PetscFunctionReturn(0); 4899 } 4900 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 if (scall == MAT_INITIAL_MATRIX) { 4902 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4903 } 4904 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4905 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4906 PetscFunctionReturn(0); 4907 } 4908 4909 /*@ 4910 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4911 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4912 with MatGetSize() 4913 4914 Not Collective 4915 4916 Input Parameters: 4917 + A - the matrix 4918 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4919 4920 Output Parameter: 4921 . A_loc - the local sequential matrix generated 4922 4923 Level: developer 4924 4925 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4926 4927 @*/ 4928 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4929 { 4930 PetscErrorCode ierr; 4931 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4932 Mat_SeqAIJ *mat,*a,*b; 4933 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4934 MatScalar *aa,*ba,*cam; 4935 PetscScalar *ca; 4936 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4937 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4938 PetscBool match; 4939 MPI_Comm comm; 4940 PetscMPIInt size; 4941 4942 PetscFunctionBegin; 4943 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4944 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4945 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4946 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4947 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4948 4949 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4950 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4951 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4952 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4953 aa = a->a; ba = b->a; 4954 if (scall == MAT_INITIAL_MATRIX) { 4955 if (size == 1) { 4956 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4961 ci[0] = 0; 4962 for (i=0; i<am; i++) { 4963 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4964 } 4965 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4966 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4967 k = 0; 4968 for (i=0; i<am; i++) { 4969 ncols_o = bi[i+1] - bi[i]; 4970 ncols_d = ai[i+1] - ai[i]; 4971 /* off-diagonal portion of A */ 4972 for (jo=0; jo<ncols_o; jo++) { 4973 col = cmap[*bj]; 4974 if (col >= cstart) break; 4975 cj[k] = col; bj++; 4976 ca[k++] = *ba++; 4977 } 4978 /* diagonal portion of A */ 4979 for (j=0; j<ncols_d; j++) { 4980 cj[k] = cstart + *aj++; 4981 ca[k++] = *aa++; 4982 } 4983 /* off-diagonal portion of A */ 4984 for (j=jo; j<ncols_o; j++) { 4985 cj[k] = cmap[*bj++]; 4986 ca[k++] = *ba++; 4987 } 4988 } 4989 /* put together the new matrix */ 4990 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4991 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4992 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4993 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4994 mat->free_a = PETSC_TRUE; 4995 mat->free_ij = PETSC_TRUE; 4996 mat->nonew = 0; 4997 } else if (scall == MAT_REUSE_MATRIX) { 4998 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4999 ci = mat->i; cj = mat->j; cam = mat->a; 5000 for (i=0; i<am; i++) { 5001 /* off-diagonal portion of A */ 5002 ncols_o = bi[i+1] - bi[i]; 5003 for (jo=0; jo<ncols_o; jo++) { 5004 col = cmap[*bj]; 5005 if (col >= cstart) break; 5006 *cam++ = *ba++; bj++; 5007 } 5008 /* diagonal portion of A */ 5009 ncols_d = ai[i+1] - ai[i]; 5010 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5011 /* off-diagonal portion of A */ 5012 for (j=jo; j<ncols_o; j++) { 5013 *cam++ = *ba++; bj++; 5014 } 5015 } 5016 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5017 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5018 PetscFunctionReturn(0); 5019 } 5020 5021 /*@C 5022 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5023 5024 Not Collective 5025 5026 Input Parameters: 5027 + A - the matrix 5028 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5029 - row, col - index sets of rows and columns to extract (or NULL) 5030 5031 Output Parameter: 5032 . A_loc - the local sequential matrix generated 5033 5034 Level: developer 5035 5036 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5037 5038 @*/ 5039 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5040 { 5041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5042 PetscErrorCode ierr; 5043 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5044 IS isrowa,iscola; 5045 Mat *aloc; 5046 PetscBool match; 5047 5048 PetscFunctionBegin; 5049 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5050 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5051 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5052 if (!row) { 5053 start = A->rmap->rstart; end = A->rmap->rend; 5054 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5055 } else { 5056 isrowa = *row; 5057 } 5058 if (!col) { 5059 start = A->cmap->rstart; 5060 cmap = a->garray; 5061 nzA = a->A->cmap->n; 5062 nzB = a->B->cmap->n; 5063 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5064 ncols = 0; 5065 for (i=0; i<nzB; i++) { 5066 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5067 else break; 5068 } 5069 imark = i; 5070 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5071 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5072 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5073 } else { 5074 iscola = *col; 5075 } 5076 if (scall != MAT_INITIAL_MATRIX) { 5077 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5078 aloc[0] = *A_loc; 5079 } 5080 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5081 if (!col) { /* attach global id of condensed columns */ 5082 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5083 } 5084 *A_loc = aloc[0]; 5085 ierr = PetscFree(aloc);CHKERRQ(ierr); 5086 if (!row) { 5087 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5088 } 5089 if (!col) { 5090 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5091 } 5092 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 /*@C 5097 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5098 5099 Collective on Mat 5100 5101 Input Parameters: 5102 + A,B - the matrices in mpiaij format 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5105 5106 Output Parameter: 5107 + rowb, colb - index sets of rows and columns of B to extract 5108 - B_seq - the sequential matrix generated 5109 5110 Level: developer 5111 5112 @*/ 5113 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5114 { 5115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5116 PetscErrorCode ierr; 5117 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5118 IS isrowb,iscolb; 5119 Mat *bseq=NULL; 5120 5121 PetscFunctionBegin; 5122 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5123 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5124 } 5125 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5126 5127 if (scall == MAT_INITIAL_MATRIX) { 5128 start = A->cmap->rstart; 5129 cmap = a->garray; 5130 nzA = a->A->cmap->n; 5131 nzB = a->B->cmap->n; 5132 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5133 ncols = 0; 5134 for (i=0; i<nzB; i++) { /* row < local row index */ 5135 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5136 else break; 5137 } 5138 imark = i; 5139 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5140 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5141 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5142 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5143 } else { 5144 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5145 isrowb = *rowb; iscolb = *colb; 5146 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5147 bseq[0] = *B_seq; 5148 } 5149 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5150 *B_seq = bseq[0]; 5151 ierr = PetscFree(bseq);CHKERRQ(ierr); 5152 if (!rowb) { 5153 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5154 } else { 5155 *rowb = isrowb; 5156 } 5157 if (!colb) { 5158 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5159 } else { 5160 *colb = iscolb; 5161 } 5162 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5163 PetscFunctionReturn(0); 5164 } 5165 5166 /* 5167 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5168 of the OFF-DIAGONAL portion of local A 5169 5170 Collective on Mat 5171 5172 Input Parameters: 5173 + A,B - the matrices in mpiaij format 5174 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5175 5176 Output Parameter: 5177 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5178 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5179 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5180 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5181 5182 Level: developer 5183 5184 */ 5185 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5186 { 5187 VecScatter_MPI_General *gen_to,*gen_from; 5188 PetscErrorCode ierr; 5189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5190 Mat_SeqAIJ *b_oth; 5191 VecScatter ctx; 5192 MPI_Comm comm; 5193 PetscMPIInt *rprocs,*sprocs,tag,rank; 5194 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5195 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5196 PetscScalar *b_otha,*bufa,*bufA,*vals; 5197 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5198 MPI_Request *rwaits = NULL,*swaits = NULL; 5199 MPI_Status *sstatus,rstatus; 5200 PetscMPIInt jj,size; 5201 VecScatterType type; 5202 PetscBool mpi1; 5203 5204 PetscFunctionBegin; 5205 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5206 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5207 5208 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5209 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5210 } 5211 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5212 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5213 5214 if (size == 1) { 5215 startsj_s = NULL; 5216 bufa_ptr = NULL; 5217 *B_oth = NULL; 5218 PetscFunctionReturn(0); 5219 } 5220 5221 ctx = a->Mvctx; 5222 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5223 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5224 if (!mpi1) { 5225 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5226 thus create a->Mvctx_mpi1 */ 5227 if (!a->Mvctx_mpi1) { 5228 a->Mvctx_mpi1_flg = PETSC_TRUE; 5229 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5230 } 5231 ctx = a->Mvctx_mpi1; 5232 } 5233 tag = ((PetscObject)ctx)->tag; 5234 5235 gen_to = (VecScatter_MPI_General*)ctx->todata; 5236 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5237 nrecvs = gen_from->n; 5238 nsends = gen_to->n; 5239 5240 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5241 srow = gen_to->indices; /* local row index to be sent */ 5242 sstarts = gen_to->starts; 5243 sprocs = gen_to->procs; 5244 sstatus = gen_to->sstatus; 5245 sbs = gen_to->bs; 5246 rstarts = gen_from->starts; 5247 rprocs = gen_from->procs; 5248 rbs = gen_from->bs; 5249 5250 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5251 if (scall == MAT_INITIAL_MATRIX) { 5252 /* i-array */ 5253 /*---------*/ 5254 /* post receives */ 5255 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5256 for (i=0; i<nrecvs; i++) { 5257 rowlen = rvalues + rstarts[i]*rbs; 5258 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5259 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5260 } 5261 5262 /* pack the outgoing message */ 5263 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5264 5265 sstartsj[0] = 0; 5266 rstartsj[0] = 0; 5267 len = 0; /* total length of j or a array to be sent */ 5268 k = 0; 5269 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5270 for (i=0; i<nsends; i++) { 5271 rowlen = svalues + sstarts[i]*sbs; 5272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5273 for (j=0; j<nrows; j++) { 5274 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5275 for (l=0; l<sbs; l++) { 5276 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5277 5278 rowlen[j*sbs+l] = ncols; 5279 5280 len += ncols; 5281 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5282 } 5283 k++; 5284 } 5285 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 5287 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5288 } 5289 /* recvs and sends of i-array are completed */ 5290 i = nrecvs; 5291 while (i--) { 5292 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5293 } 5294 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5295 ierr = PetscFree(svalues);CHKERRQ(ierr); 5296 5297 /* allocate buffers for sending j and a arrays */ 5298 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5299 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5300 5301 /* create i-array of B_oth */ 5302 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5303 5304 b_othi[0] = 0; 5305 len = 0; /* total length of j or a array to be received */ 5306 k = 0; 5307 for (i=0; i<nrecvs; i++) { 5308 rowlen = rvalues + rstarts[i]*rbs; 5309 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5310 for (j=0; j<nrows; j++) { 5311 b_othi[k+1] = b_othi[k] + rowlen[j]; 5312 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5313 k++; 5314 } 5315 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5316 } 5317 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5318 5319 /* allocate space for j and a arrrays of B_oth */ 5320 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5321 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5322 5323 /* j-array */ 5324 /*---------*/ 5325 /* post receives of j-array */ 5326 for (i=0; i<nrecvs; i++) { 5327 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5328 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message j-array */ 5332 k = 0; 5333 for (i=0; i<nsends; i++) { 5334 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5335 bufJ = bufj+sstartsj[i]; 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5338 for (ll=0; ll<sbs; ll++) { 5339 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5340 for (l=0; l<ncols; l++) { 5341 *bufJ++ = cols[l]; 5342 } 5343 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5344 } 5345 } 5346 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* recvs and sends of j-array are completed */ 5350 i = nrecvs; 5351 while (i--) { 5352 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5353 } 5354 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5355 } else if (scall == MAT_REUSE_MATRIX) { 5356 sstartsj = *startsj_s; 5357 rstartsj = *startsj_r; 5358 bufa = *bufa_ptr; 5359 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5360 b_otha = b_oth->a; 5361 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5362 5363 /* a-array */ 5364 /*---------*/ 5365 /* post receives of a-array */ 5366 for (i=0; i<nrecvs; i++) { 5367 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5368 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5369 } 5370 5371 /* pack the outgoing message a-array */ 5372 k = 0; 5373 for (i=0; i<nsends; i++) { 5374 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5375 bufA = bufa+sstartsj[i]; 5376 for (j=0; j<nrows; j++) { 5377 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5378 for (ll=0; ll<sbs; ll++) { 5379 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5380 for (l=0; l<ncols; l++) { 5381 *bufA++ = vals[l]; 5382 } 5383 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5384 } 5385 } 5386 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5387 } 5388 /* recvs and sends of a-array are completed */ 5389 i = nrecvs; 5390 while (i--) { 5391 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5392 } 5393 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5394 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5395 5396 if (scall == MAT_INITIAL_MATRIX) { 5397 /* put together the new matrix */ 5398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5399 5400 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5401 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5402 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5403 b_oth->free_a = PETSC_TRUE; 5404 b_oth->free_ij = PETSC_TRUE; 5405 b_oth->nonew = 0; 5406 5407 ierr = PetscFree(bufj);CHKERRQ(ierr); 5408 if (!startsj_s || !bufa_ptr) { 5409 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5410 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5411 } else { 5412 *startsj_s = sstartsj; 5413 *startsj_r = rstartsj; 5414 *bufa_ptr = bufa; 5415 } 5416 } 5417 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5418 PetscFunctionReturn(0); 5419 } 5420 5421 /*@C 5422 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5423 5424 Not Collective 5425 5426 Input Parameters: 5427 . A - The matrix in mpiaij format 5428 5429 Output Parameter: 5430 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5431 . colmap - A map from global column index to local index into lvec 5432 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5433 5434 Level: developer 5435 5436 @*/ 5437 #if defined(PETSC_USE_CTABLE) 5438 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5439 #else 5440 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5441 #endif 5442 { 5443 Mat_MPIAIJ *a; 5444 5445 PetscFunctionBegin; 5446 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5447 PetscValidPointer(lvec, 2); 5448 PetscValidPointer(colmap, 3); 5449 PetscValidPointer(multScatter, 4); 5450 a = (Mat_MPIAIJ*) A->data; 5451 if (lvec) *lvec = a->lvec; 5452 if (colmap) *colmap = a->colmap; 5453 if (multScatter) *multScatter = a->Mvctx; 5454 PetscFunctionReturn(0); 5455 } 5456 5457 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5458 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5459 #if defined(PETSC_HAVE_MKL_SPARSE) 5460 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5461 #endif 5462 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5463 #if defined(PETSC_HAVE_ELEMENTAL) 5464 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5465 #endif 5466 #if defined(PETSC_HAVE_HYPRE) 5467 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5468 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5469 #endif 5470 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5471 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5472 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5473 5474 /* 5475 Computes (B'*A')' since computing B*A directly is untenable 5476 5477 n p p 5478 ( ) ( ) ( ) 5479 m ( A ) * n ( B ) = m ( C ) 5480 ( ) ( ) ( ) 5481 5482 */ 5483 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5484 { 5485 PetscErrorCode ierr; 5486 Mat At,Bt,Ct; 5487 5488 PetscFunctionBegin; 5489 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5490 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5491 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5492 ierr = MatDestroy(&At);CHKERRQ(ierr); 5493 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5494 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5495 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5496 PetscFunctionReturn(0); 5497 } 5498 5499 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5500 { 5501 PetscErrorCode ierr; 5502 PetscInt m=A->rmap->n,n=B->cmap->n; 5503 Mat Cmat; 5504 5505 PetscFunctionBegin; 5506 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5507 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5508 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5509 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5510 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5511 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5512 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5513 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5514 5515 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5516 5517 *C = Cmat; 5518 PetscFunctionReturn(0); 5519 } 5520 5521 /* ----------------------------------------------------------------*/ 5522 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5523 { 5524 PetscErrorCode ierr; 5525 5526 PetscFunctionBegin; 5527 if (scall == MAT_INITIAL_MATRIX) { 5528 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5529 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5530 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5531 } 5532 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5533 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5534 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5535 PetscFunctionReturn(0); 5536 } 5537 5538 /*MC 5539 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5540 5541 Options Database Keys: 5542 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5543 5544 Level: beginner 5545 5546 .seealso: MatCreateAIJ() 5547 M*/ 5548 5549 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5550 { 5551 Mat_MPIAIJ *b; 5552 PetscErrorCode ierr; 5553 PetscMPIInt size; 5554 5555 PetscFunctionBegin; 5556 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5557 5558 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5559 B->data = (void*)b; 5560 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5561 B->assembled = PETSC_FALSE; 5562 B->insertmode = NOT_SET_VALUES; 5563 b->size = size; 5564 5565 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5566 5567 /* build cache for off array entries formed */ 5568 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5569 5570 b->donotstash = PETSC_FALSE; 5571 b->colmap = 0; 5572 b->garray = 0; 5573 b->roworiented = PETSC_TRUE; 5574 5575 /* stuff used for matrix vector multiply */ 5576 b->lvec = NULL; 5577 b->Mvctx = NULL; 5578 5579 /* stuff for MatGetRow() */ 5580 b->rowindices = 0; 5581 b->rowvalues = 0; 5582 b->getrowactive = PETSC_FALSE; 5583 5584 /* flexible pointer used in CUSP/CUSPARSE classes */ 5585 b->spptr = NULL; 5586 5587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5596 #if defined(PETSC_HAVE_MKL_SPARSE) 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5598 #endif 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5601 #if defined(PETSC_HAVE_ELEMENTAL) 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5603 #endif 5604 #if defined(PETSC_HAVE_HYPRE) 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5606 #endif 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5612 #if defined(PETSC_HAVE_HYPRE) 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5614 #endif 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5617 PetscFunctionReturn(0); 5618 } 5619 5620 /*@C 5621 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5622 and "off-diagonal" part of the matrix in CSR format. 5623 5624 Collective on MPI_Comm 5625 5626 Input Parameters: 5627 + comm - MPI communicator 5628 . m - number of local rows (Cannot be PETSC_DECIDE) 5629 . n - This value should be the same as the local size used in creating the 5630 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5631 calculated if N is given) For square matrices n is almost always m. 5632 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5633 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5634 . i - row indices for "diagonal" portion of matrix 5635 . j - column indices 5636 . a - matrix values 5637 . oi - row indices for "off-diagonal" portion of matrix 5638 . oj - column indices 5639 - oa - matrix values 5640 5641 Output Parameter: 5642 . mat - the matrix 5643 5644 Level: advanced 5645 5646 Notes: 5647 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5648 must free the arrays once the matrix has been destroyed and not before. 5649 5650 The i and j indices are 0 based 5651 5652 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5653 5654 This sets local rows and cannot be used to set off-processor values. 5655 5656 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5657 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5658 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5659 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5660 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5661 communication if it is known that only local entries will be set. 5662 5663 .keywords: matrix, aij, compressed row, sparse, parallel 5664 5665 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5666 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5667 @*/ 5668 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5669 { 5670 PetscErrorCode ierr; 5671 Mat_MPIAIJ *maij; 5672 5673 PetscFunctionBegin; 5674 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5675 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5676 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5677 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5678 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5679 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5680 maij = (Mat_MPIAIJ*) (*mat)->data; 5681 5682 (*mat)->preallocated = PETSC_TRUE; 5683 5684 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5685 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5686 5687 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5688 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5689 5690 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 5695 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5696 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5699 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5700 PetscFunctionReturn(0); 5701 } 5702 5703 /* 5704 Special version for direct calls from Fortran 5705 */ 5706 #include <petsc/private/fortranimpl.h> 5707 5708 /* Change these macros so can be used in void function */ 5709 #undef CHKERRQ 5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5711 #undef SETERRQ2 5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ3 5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ 5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5717 5718 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5722 #else 5723 #endif 5724 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5725 { 5726 Mat mat = *mmat; 5727 PetscInt m = *mm, n = *mn; 5728 InsertMode addv = *maddv; 5729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5730 PetscScalar value; 5731 PetscErrorCode ierr; 5732 5733 MatCheckPreallocated(mat,1); 5734 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5735 5736 #if defined(PETSC_USE_DEBUG) 5737 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5738 #endif 5739 { 5740 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5741 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5742 PetscBool roworiented = aij->roworiented; 5743 5744 /* Some Variables required in the macro */ 5745 Mat A = aij->A; 5746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5747 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5748 MatScalar *aa = a->a; 5749 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5750 Mat B = aij->B; 5751 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5752 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5753 MatScalar *ba = b->a; 5754 5755 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5756 PetscInt nonew = a->nonew; 5757 MatScalar *ap1,*ap2; 5758 5759 PetscFunctionBegin; 5760 for (i=0; i<m; i++) { 5761 if (im[i] < 0) continue; 5762 #if defined(PETSC_USE_DEBUG) 5763 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5764 #endif 5765 if (im[i] >= rstart && im[i] < rend) { 5766 row = im[i] - rstart; 5767 lastcol1 = -1; 5768 rp1 = aj + ai[row]; 5769 ap1 = aa + ai[row]; 5770 rmax1 = aimax[row]; 5771 nrow1 = ailen[row]; 5772 low1 = 0; 5773 high1 = nrow1; 5774 lastcol2 = -1; 5775 rp2 = bj + bi[row]; 5776 ap2 = ba + bi[row]; 5777 rmax2 = bimax[row]; 5778 nrow2 = bilen[row]; 5779 low2 = 0; 5780 high2 = nrow2; 5781 5782 for (j=0; j<n; j++) { 5783 if (roworiented) value = v[i*n+j]; 5784 else value = v[i+j*m]; 5785 if (in[j] >= cstart && in[j] < cend) { 5786 col = in[j] - cstart; 5787 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5788 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5789 } else if (in[j] < 0) continue; 5790 #if defined(PETSC_USE_DEBUG) 5791 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5792 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5793 #endif 5794 else { 5795 if (mat->was_assembled) { 5796 if (!aij->colmap) { 5797 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5798 } 5799 #if defined(PETSC_USE_CTABLE) 5800 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5801 col--; 5802 #else 5803 col = aij->colmap[in[j]] - 1; 5804 #endif 5805 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5806 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5807 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5808 col = in[j]; 5809 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5810 B = aij->B; 5811 b = (Mat_SeqAIJ*)B->data; 5812 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5813 rp2 = bj + bi[row]; 5814 ap2 = ba + bi[row]; 5815 rmax2 = bimax[row]; 5816 nrow2 = bilen[row]; 5817 low2 = 0; 5818 high2 = nrow2; 5819 bm = aij->B->rmap->n; 5820 ba = b->a; 5821 } 5822 } else col = in[j]; 5823 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5824 } 5825 } 5826 } else if (!aij->donotstash) { 5827 if (roworiented) { 5828 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5829 } else { 5830 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5831 } 5832 } 5833 } 5834 } 5835 PetscFunctionReturnVoid(); 5836 } 5837 5838