1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscBool lf; 1105 PetscMPIInt size; 1106 1107 PetscFunctionBegin; 1108 /* Easy test: symmetric diagonal block */ 1109 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1110 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1111 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1112 if (!*f) PetscFunctionReturn(0); 1113 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size == 1) PetscFunctionReturn(0); 1116 1117 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1118 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1121 for (i=0; i<first; i++) notme[i] = i; 1122 for (i=last; i<M; i++) notme[i-last+first] = i; 1123 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1124 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1125 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1126 Aoff = Aoffs[0]; 1127 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1128 Boff = Boffs[0]; 1129 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1130 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1131 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1132 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1133 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1134 ierr = PetscFree(notme);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 /* receive remote parts */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 This only works correctly for square matrices where the subblock A->A is the 1166 diagonal block 1167 */ 1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1169 { 1170 PetscErrorCode ierr; 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 1173 PetscFunctionBegin; 1174 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1175 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1187 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1202 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1212 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1213 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1214 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1215 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_ELEMENTAL) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1242 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1243 PetscErrorCode ierr; 1244 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1245 int fd; 1246 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1247 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1248 PetscScalar *column_values; 1249 PetscInt message_count,flowcontrolcount; 1250 FILE *file; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1255 nz = A->nz + B->nz; 1256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 if (!rank) { 1258 header[0] = MAT_FILE_CLASSID; 1259 header[1] = mat->rmap->N; 1260 header[2] = mat->cmap->N; 1261 1262 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 /* get largest number of rows any processor has */ 1265 rlen = mat->rmap->n; 1266 range = mat->rmap->range; 1267 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1268 } else { 1269 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 rlen = mat->rmap->n; 1271 } 1272 1273 /* load up the local row counts */ 1274 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1275 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1276 1277 /* store the row lengths to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1283 rlen = range[i+1] - range[i]; 1284 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1286 } 1287 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1290 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1291 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1294 1295 /* load up the local column indices */ 1296 nzmax = nz; /* th processor needs space a largest processor needs */ 1297 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ((col = garray[B->j[j]]) > cstart) break; 1303 column_indices[cnt++] = col; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1306 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1307 } 1308 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1309 1310 /* store the column indices to the file */ 1311 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1312 if (!rank) { 1313 MPI_Status status; 1314 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 for (i=1; i<size; i++) { 1316 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1317 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1319 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1325 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1326 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1328 } 1329 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1330 1331 /* load up the local column values */ 1332 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1333 cnt = 0; 1334 for (i=0; i<mat->rmap->n; i++) { 1335 for (j=B->i[i]; j<B->i[i+1]; j++) { 1336 if (garray[B->j[j]] > cstart) break; 1337 column_values[cnt++] = B->a[j]; 1338 } 1339 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1340 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1341 } 1342 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1343 1344 /* store the column values to the file */ 1345 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1346 if (!rank) { 1347 MPI_Status status; 1348 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1349 for (i=1; i<size; i++) { 1350 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1351 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1352 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1353 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1360 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(column_values);CHKERRQ(ierr); 1364 1365 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1366 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #include <petscdraw.h> 1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 PetscErrorCode ierr; 1375 PetscMPIInt rank = aij->rank,size = aij->size; 1376 PetscBool isdraw,iascii,isbinary; 1377 PetscViewer sviewer; 1378 PetscViewerFormat format; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 if (iascii) { 1385 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1386 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1387 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1388 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1389 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 for (i=0; i<(PetscInt)size; i++) { 1391 nmax = PetscMax(nmax,nz[i]); 1392 nmin = PetscMin(nmin,nz[i]); 1393 navg += nz[i]; 1394 } 1395 ierr = PetscFree(nz);CHKERRQ(ierr); 1396 navg = navg/size; 1397 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1401 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1402 MatInfo info; 1403 PetscBool inodes; 1404 1405 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1406 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1407 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1409 if (!inodes) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1414 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1415 } 1416 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1423 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1426 PetscInt inodecount,inodelimit,*inodes; 1427 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1428 if (inodes) { 1429 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1430 } else { 1431 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1435 PetscFunctionReturn(0); 1436 } 1437 } else if (isbinary) { 1438 if (size == 1) { 1439 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1440 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (isdraw) { 1446 PetscDraw draw; 1447 PetscBool isnull; 1448 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1449 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1450 if (isnull) PetscFunctionReturn(0); 1451 } 1452 1453 { 1454 /* assemble the entire matrix onto first processor. */ 1455 Mat A; 1456 Mat_SeqAIJ *Aloc; 1457 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1458 MatScalar *a; 1459 1460 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1461 if (!rank) { 1462 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1465 } 1466 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1467 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1468 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1469 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqAIJ*)aij->A->data; 1474 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; 1480 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1481 } 1482 aj = Aloc->j; 1483 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1484 1485 /* copy over the B part */ 1486 Aloc = (Mat_SeqAIJ*)aij->B->data; 1487 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1488 row = mat->rmap->rstart; 1489 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1490 ct = cols; 1491 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1492 for (i=0; i<m; i++) { 1493 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1494 row++; 1495 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1496 } 1497 ierr = PetscFree(ct);CHKERRQ(ierr); 1498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 /* 1501 Everyone has to call to draw the matrix since the graphics waits are 1502 synchronized across all processors that share the PetscDraw object 1503 */ 1504 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1508 } 1509 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1517 { 1518 PetscErrorCode ierr; 1519 PetscBool iascii,isdraw,issocket,isbinary; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1526 if (iascii || isdraw || isbinary || issocket) { 1527 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1533 { 1534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1535 PetscErrorCode ierr; 1536 Vec bb1 = 0; 1537 PetscBool hasop; 1538 1539 PetscFunctionBegin; 1540 if (flag == SOR_APPLY_UPPER) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1547 } 1548 1549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_EISENSTAT) { 1599 Vec xx1; 1600 1601 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1603 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 if (!mat->diag) { 1607 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1608 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1609 } 1610 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1611 if (hasop) { 1612 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1613 } else { 1614 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1615 } 1616 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1617 1618 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1622 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1623 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1624 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1625 1626 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1627 1628 matin->factorerrortype = mat->A->factorerrortype; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1633 { 1634 Mat aA,aB,Aperm; 1635 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1636 PetscScalar *aa,*ba; 1637 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1638 PetscSF rowsf,sf; 1639 IS parcolp = NULL; 1640 PetscBool done; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1645 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1648 1649 /* Invert row permutation to find out where my rows should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1654 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 1657 /* Invert column permutation to find out where my columns should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1661 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1662 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1665 1666 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1667 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1668 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1669 1670 /* Find out where my gcols should go */ 1671 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1681 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 PetscInt row = rdest[i],rowner; 1685 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1686 for (j=ai[i]; j<ai[i+1]; j++) { 1687 PetscInt cowner,col = cdest[aj[j]]; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 for (j=bi[i]; j<bi[i+1]; j++) { 1693 PetscInt cowner,col = gcdest[bj[j]]; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 } 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1704 1705 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1706 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1708 for (i=0; i<m; i++) { 1709 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1710 PetscInt j0,rowlen; 1711 rowlen = ai[i+1] - ai[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 rowlen = bi[i+1] - bi[i]; 1717 for (j0=j=0; j<rowlen; j0=j) { 1718 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1719 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 } 1722 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1726 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1728 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1729 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1730 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1731 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1732 *B = Aperm; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1743 if (ghosts) *ghosts = aij->garray; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1759 isend[3] = info->memory; isend[4] = info->mallocs; 1760 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } 1788 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1789 info->fill_ratio_needed = 0; 1790 info->factor_mallocs = 0; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 MatCheckPreallocated(A,1); 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 MatCheckPreallocated(A,1); 1814 a->roworiented = flg; 1815 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1826 case MAT_SPD: 1827 case MAT_SYMMETRIC: 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 case MAT_HERMITIAN: 1830 case MAT_SYMMETRY_ETERNAL: 1831 break; 1832 case MAT_SUBMAT_SINGLEIS: 1833 A->submat_singleis = flg; 1834 break; 1835 case MAT_STRUCTURE_ONLY: 1836 /* The option is handled directly by MatSetOption() */ 1837 break; 1838 default: 1839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1840 } 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1845 { 1846 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1847 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1848 PetscErrorCode ierr; 1849 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1850 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1851 PetscInt *cmap,*idx_p; 1852 1853 PetscFunctionBegin; 1854 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1855 mat->getrowactive = PETSC_TRUE; 1856 1857 if (!mat->rowvalues && (idx || v)) { 1858 /* 1859 allocate enough space to hold information from the longest row. 1860 */ 1861 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1862 PetscInt max = 1,tmp; 1863 for (i=0; i<matin->rmap->n; i++) { 1864 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1865 if (max < tmp) max = tmp; 1866 } 1867 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1868 } 1869 1870 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1871 lrow = row - rstart; 1872 1873 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1874 if (!v) {pvA = 0; pvB = 0;} 1875 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1876 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1877 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1878 nztot = nzA + nzB; 1879 1880 cmap = mat->garray; 1881 if (v || idx) { 1882 if (nztot) { 1883 /* Sort by increasing column numbers, assuming A and B already sorted */ 1884 PetscInt imark = -1; 1885 if (v) { 1886 *v = v_p = mat->rowvalues; 1887 for (i=0; i<nzB; i++) { 1888 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1889 else break; 1890 } 1891 imark = i; 1892 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1893 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1894 } 1895 if (idx) { 1896 *idx = idx_p = mat->rowindices; 1897 if (imark > -1) { 1898 for (i=0; i<imark; i++) { 1899 idx_p[i] = cmap[cworkB[i]]; 1900 } 1901 } else { 1902 for (i=0; i<nzB; i++) { 1903 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1904 else break; 1905 } 1906 imark = i; 1907 } 1908 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1909 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1910 } 1911 } else { 1912 if (idx) *idx = 0; 1913 if (v) *v = 0; 1914 } 1915 } 1916 *nz = nztot; 1917 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1918 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1919 PetscFunctionReturn(0); 1920 } 1921 1922 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1923 { 1924 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1925 1926 PetscFunctionBegin; 1927 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1928 aij->getrowactive = PETSC_FALSE; 1929 PetscFunctionReturn(0); 1930 } 1931 1932 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1936 PetscErrorCode ierr; 1937 PetscInt i,j,cstart = mat->cmap->rstart; 1938 PetscReal sum = 0.0; 1939 MatScalar *v; 1940 1941 PetscFunctionBegin; 1942 if (aij->size == 1) { 1943 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1944 } else { 1945 if (type == NORM_FROBENIUS) { 1946 v = amat->a; 1947 for (i=0; i<amat->nz; i++) { 1948 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1949 } 1950 v = bmat->a; 1951 for (i=0; i<bmat->nz; i++) { 1952 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1953 } 1954 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1955 *norm = PetscSqrtReal(*norm); 1956 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1957 } else if (type == NORM_1) { /* max column norm */ 1958 PetscReal *tmp,*tmp2; 1959 PetscInt *jj,*garray = aij->garray; 1960 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1961 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1962 *norm = 0.0; 1963 v = amat->a; jj = amat->j; 1964 for (j=0; j<amat->nz; j++) { 1965 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1966 } 1967 v = bmat->a; jj = bmat->j; 1968 for (j=0; j<bmat->nz; j++) { 1969 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1970 } 1971 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 for (j=0; j<mat->cmap->N; j++) { 1973 if (tmp2[j] > *norm) *norm = tmp2[j]; 1974 } 1975 ierr = PetscFree(tmp);CHKERRQ(ierr); 1976 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1977 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1978 } else if (type == NORM_INFINITY) { /* max row norm */ 1979 PetscReal ntemp = 0.0; 1980 for (j=0; j<aij->A->rmap->n; j++) { 1981 v = amat->a + amat->i[j]; 1982 sum = 0.0; 1983 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1984 sum += PetscAbsScalar(*v); v++; 1985 } 1986 v = bmat->a + bmat->i[j]; 1987 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1988 sum += PetscAbsScalar(*v); v++; 1989 } 1990 if (sum > ntemp) ntemp = sum; 1991 } 1992 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1993 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1994 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2000 { 2001 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2002 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2003 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2237 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2238 } else { 2239 Mat B; 2240 PetscInt *nnz_d,*nnz_o; 2241 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2242 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2243 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2244 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2245 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2246 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2247 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2248 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2249 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2250 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2251 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2252 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2253 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2254 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2255 } 2256 PetscFunctionReturn(0); 2257 } 2258 2259 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2260 2261 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2262 { 2263 #if defined(PETSC_USE_COMPLEX) 2264 PetscErrorCode ierr; 2265 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2266 2267 PetscFunctionBegin; 2268 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2269 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2270 #else 2271 PetscFunctionBegin; 2272 #endif 2273 PetscFunctionReturn(0); 2274 } 2275 2276 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2277 { 2278 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2283 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2284 PetscFunctionReturn(0); 2285 } 2286 2287 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2288 { 2289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2290 PetscErrorCode ierr; 2291 2292 PetscFunctionBegin; 2293 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2294 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2301 PetscErrorCode ierr; 2302 PetscInt i,*idxb = 0; 2303 PetscScalar *va,*vb; 2304 Vec vtmp; 2305 2306 PetscFunctionBegin; 2307 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2308 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2309 if (idx) { 2310 for (i=0; i<A->rmap->n; i++) { 2311 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2312 } 2313 } 2314 2315 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2316 if (idx) { 2317 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2318 } 2319 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2320 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2321 2322 for (i=0; i<A->rmap->n; i++) { 2323 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2324 va[i] = vb[i]; 2325 if (idx) idx[i] = a->garray[idxb[i]]; 2326 } 2327 } 2328 2329 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2330 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2331 ierr = PetscFree(idxb);CHKERRQ(ierr); 2332 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2333 PetscFunctionReturn(0); 2334 } 2335 2336 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2337 { 2338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2339 PetscErrorCode ierr; 2340 PetscInt i,*idxb = 0; 2341 PetscScalar *va,*vb; 2342 Vec vtmp; 2343 2344 PetscFunctionBegin; 2345 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2346 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2347 if (idx) { 2348 for (i=0; i<A->cmap->n; i++) { 2349 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2350 } 2351 } 2352 2353 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2354 if (idx) { 2355 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2356 } 2357 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2358 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2359 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2362 va[i] = vb[i]; 2363 if (idx) idx[i] = a->garray[idxb[i]]; 2364 } 2365 } 2366 2367 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2368 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2369 ierr = PetscFree(idxb);CHKERRQ(ierr); 2370 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2371 PetscFunctionReturn(0); 2372 } 2373 2374 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2375 { 2376 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2377 PetscInt n = A->rmap->n; 2378 PetscInt cstart = A->cmap->rstart; 2379 PetscInt *cmap = mat->garray; 2380 PetscInt *diagIdx, *offdiagIdx; 2381 Vec diagV, offdiagV; 2382 PetscScalar *a, *diagA, *offdiagA; 2383 PetscInt r; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2388 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2389 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2390 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2391 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2392 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2393 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2394 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2395 for (r = 0; r < n; ++r) { 2396 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2397 a[r] = diagA[r]; 2398 idx[r] = cstart + diagIdx[r]; 2399 } else { 2400 a[r] = offdiagA[r]; 2401 idx[r] = cmap[offdiagIdx[r]]; 2402 } 2403 } 2404 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2408 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2409 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2414 { 2415 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2416 PetscInt n = A->rmap->n; 2417 PetscInt cstart = A->cmap->rstart; 2418 PetscInt *cmap = mat->garray; 2419 PetscInt *diagIdx, *offdiagIdx; 2420 Vec diagV, offdiagV; 2421 PetscScalar *a, *diagA, *offdiagA; 2422 PetscInt r; 2423 PetscErrorCode ierr; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2427 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2428 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2429 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2430 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2432 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2433 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 for (r = 0; r < n; ++r) { 2435 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 idx[r] = cstart + diagIdx[r]; 2438 } else { 2439 a[r] = offdiagA[r]; 2440 idx[r] = cmap[offdiagIdx[r]]; 2441 } 2442 } 2443 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2444 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2445 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2446 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2447 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2448 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2449 PetscFunctionReturn(0); 2450 } 2451 2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2453 { 2454 PetscErrorCode ierr; 2455 Mat *dummy; 2456 2457 PetscFunctionBegin; 2458 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2459 *newmat = *dummy; 2460 ierr = PetscFree(dummy);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2465 { 2466 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2467 PetscErrorCode ierr; 2468 2469 PetscFunctionBegin; 2470 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2471 A->factorerrortype = a->A->factorerrortype; 2472 PetscFunctionReturn(0); 2473 } 2474 2475 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2476 { 2477 PetscErrorCode ierr; 2478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2479 2480 PetscFunctionBegin; 2481 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2482 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2483 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2484 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2489 { 2490 PetscFunctionBegin; 2491 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2492 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2493 PetscFunctionReturn(0); 2494 } 2495 2496 /*@ 2497 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2498 2499 Collective on Mat 2500 2501 Input Parameters: 2502 + A - the matrix 2503 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2504 2505 Level: advanced 2506 2507 @*/ 2508 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2509 { 2510 PetscErrorCode ierr; 2511 2512 PetscFunctionBegin; 2513 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2518 { 2519 PetscErrorCode ierr; 2520 PetscBool sc = PETSC_FALSE,flg; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2524 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2525 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2526 if (flg) { 2527 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2528 } 2529 ierr = PetscOptionsTail();CHKERRQ(ierr); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2534 { 2535 PetscErrorCode ierr; 2536 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2537 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2538 2539 PetscFunctionBegin; 2540 if (!Y->preallocated) { 2541 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2542 } else if (!aij->nz) { 2543 PetscInt nonew = aij->nonew; 2544 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2545 aij->nonew = nonew; 2546 } 2547 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2552 { 2553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2554 PetscErrorCode ierr; 2555 2556 PetscFunctionBegin; 2557 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2558 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2559 if (d) { 2560 PetscInt rstart; 2561 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2562 *d += rstart; 2563 2564 } 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 /* -------------------------------------------------------------------*/ 2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2580 MatGetRow_MPIAIJ, 2581 MatRestoreRow_MPIAIJ, 2582 MatMult_MPIAIJ, 2583 /* 4*/ MatMultAdd_MPIAIJ, 2584 MatMultTranspose_MPIAIJ, 2585 MatMultTransposeAdd_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 /*10*/ 0, 2590 0, 2591 0, 2592 MatSOR_MPIAIJ, 2593 MatTranspose_MPIAIJ, 2594 /*15*/ MatGetInfo_MPIAIJ, 2595 MatEqual_MPIAIJ, 2596 MatGetDiagonal_MPIAIJ, 2597 MatDiagonalScale_MPIAIJ, 2598 MatNorm_MPIAIJ, 2599 /*20*/ MatAssemblyBegin_MPIAIJ, 2600 MatAssemblyEnd_MPIAIJ, 2601 MatSetOption_MPIAIJ, 2602 MatZeroEntries_MPIAIJ, 2603 /*24*/ MatZeroRows_MPIAIJ, 2604 0, 2605 0, 2606 0, 2607 0, 2608 /*29*/ MatSetUp_MPIAIJ, 2609 0, 2610 0, 2611 MatGetDiagonalBlock_MPIAIJ, 2612 0, 2613 /*34*/ MatDuplicate_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*39*/ MatAXPY_MPIAIJ, 2619 MatCreateSubMatrices_MPIAIJ, 2620 MatIncreaseOverlap_MPIAIJ, 2621 MatGetValues_MPIAIJ, 2622 MatCopy_MPIAIJ, 2623 /*44*/ MatGetRowMax_MPIAIJ, 2624 MatScale_MPIAIJ, 2625 MatShift_MPIAIJ, 2626 MatDiagonalSet_MPIAIJ, 2627 MatZeroRowsColumns_MPIAIJ, 2628 /*49*/ MatSetRandom_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /*54*/ MatFDColoringCreate_MPIXAIJ, 2634 0, 2635 MatSetUnfactored_MPIAIJ, 2636 MatPermute_MPIAIJ, 2637 0, 2638 /*59*/ MatCreateSubMatrix_MPIAIJ, 2639 MatDestroy_MPIAIJ, 2640 MatView_MPIAIJ, 2641 0, 2642 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2643 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2644 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2649 MatGetRowMinAbs_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 /*75*/ MatFDColoringApply_AIJ, 2655 MatSetFromOptions_MPIAIJ, 2656 0, 2657 0, 2658 MatFindZeroDiagonals_MPIAIJ, 2659 /*80*/ 0, 2660 0, 2661 0, 2662 /*83*/ MatLoad_MPIAIJ, 2663 MatIsSymmetric_MPIAIJ, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2669 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2670 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2671 MatPtAP_MPIAIJ_MPIAIJ, 2672 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2673 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*99*/ 0, 2679 0, 2680 0, 2681 MatConjugate_MPIAIJ, 2682 0, 2683 /*104*/MatSetValuesRow_MPIAIJ, 2684 MatRealPart_MPIAIJ, 2685 MatImaginaryPart_MPIAIJ, 2686 0, 2687 0, 2688 /*109*/0, 2689 0, 2690 MatGetRowMin_MPIAIJ, 2691 0, 2692 MatMissingDiagonal_MPIAIJ, 2693 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2694 0, 2695 MatGetGhosts_MPIAIJ, 2696 0, 2697 0, 2698 /*119*/0, 2699 0, 2700 0, 2701 0, 2702 MatGetMultiProcBlock_MPIAIJ, 2703 /*124*/MatFindNonzeroRows_MPIAIJ, 2704 MatGetColumnNorms_MPIAIJ, 2705 MatInvertBlockDiagonal_MPIAIJ, 2706 MatInvertVariableBlockDiagonal_MPIAIJ, 2707 MatCreateSubMatricesMPI_MPIAIJ, 2708 /*129*/0, 2709 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2710 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2711 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2712 0, 2713 /*134*/0, 2714 0, 2715 MatRARt_MPIAIJ_MPIAIJ, 2716 0, 2717 0, 2718 /*139*/MatSetBlockSizes_MPIAIJ, 2719 0, 2720 0, 2721 MatFDColoringSetUp_MPIXAIJ, 2722 MatFindOffBlockDiagonalEntries_MPIAIJ, 2723 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2724 }; 2725 2726 /* ----------------------------------------------------------------------------------------*/ 2727 2728 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2729 { 2730 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2731 PetscErrorCode ierr; 2732 2733 PetscFunctionBegin; 2734 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2735 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2751 { 2752 Mat_MPIAIJ *b; 2753 PetscErrorCode ierr; 2754 2755 PetscFunctionBegin; 2756 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2757 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2758 b = (Mat_MPIAIJ*)B->data; 2759 2760 #if defined(PETSC_USE_CTABLE) 2761 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2762 #else 2763 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2764 #endif 2765 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2766 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2767 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2768 2769 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2770 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2773 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2774 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2776 2777 if (!B->preallocated) { 2778 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2779 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2780 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2781 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2782 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2783 } 2784 2785 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2786 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2787 B->preallocated = PETSC_TRUE; 2788 B->was_assembled = PETSC_FALSE; 2789 B->assembled = PETSC_FALSE;; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2794 { 2795 Mat_MPIAIJ *b; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 b = (Mat_MPIAIJ*)B->data; 2803 2804 #if defined(PETSC_USE_CTABLE) 2805 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2806 #else 2807 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2808 #endif 2809 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2810 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2811 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2812 2813 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2814 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2815 B->preallocated = PETSC_TRUE; 2816 B->was_assembled = PETSC_FALSE; 2817 B->assembled = PETSC_FALSE; 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2822 { 2823 Mat mat; 2824 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 *newmat = 0; 2829 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2830 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2831 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2832 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2833 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2834 a = (Mat_MPIAIJ*)mat->data; 2835 2836 mat->ops->destroy = MatDestroy_MPIAIJ; 2837 mat->factortype = matin->factortype; 2838 mat->assembled = PETSC_TRUE; 2839 mat->insertmode = NOT_SET_VALUES; 2840 mat->preallocated = PETSC_TRUE; 2841 2842 a->size = oldmat->size; 2843 a->rank = oldmat->rank; 2844 a->donotstash = oldmat->donotstash; 2845 a->roworiented = oldmat->roworiented; 2846 a->rowindices = 0; 2847 a->rowvalues = 0; 2848 a->getrowactive = PETSC_FALSE; 2849 2850 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2851 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2852 2853 if (oldmat->colmap) { 2854 #if defined(PETSC_USE_CTABLE) 2855 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2856 #else 2857 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2858 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2859 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2860 #endif 2861 } else a->colmap = 0; 2862 if (oldmat->garray) { 2863 PetscInt len; 2864 len = oldmat->B->cmap->n; 2865 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2866 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2867 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2868 } else a->garray = 0; 2869 2870 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2871 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2872 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2873 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2874 2875 if (oldmat->Mvctx_mpi1) { 2876 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2877 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2878 } 2879 2880 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2881 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2882 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2883 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2884 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2885 *newmat = mat; 2886 PetscFunctionReturn(0); 2887 } 2888 2889 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2890 { 2891 PetscBool isbinary, ishdf5; 2892 PetscErrorCode ierr; 2893 2894 PetscFunctionBegin; 2895 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2896 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2897 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2898 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2899 if (isbinary) { 2900 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2901 } else if (ishdf5) { 2902 #if defined(PETSC_HAVE_HDF5) 2903 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2904 #else 2905 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2906 #endif 2907 } else { 2908 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2909 } 2910 PetscFunctionReturn(0); 2911 } 2912 2913 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2914 { 2915 PetscScalar *vals,*svals; 2916 MPI_Comm comm; 2917 PetscErrorCode ierr; 2918 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2919 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2920 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2921 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2922 PetscInt cend,cstart,n,*rowners; 2923 int fd; 2924 PetscInt bs = newMat->rmap->bs; 2925 2926 PetscFunctionBegin; 2927 /* force binary viewer to load .info file if it has not yet done so */ 2928 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2929 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2930 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2931 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2932 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2933 if (!rank) { 2934 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2935 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2936 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2937 } 2938 2939 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2940 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2941 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2942 if (bs < 0) bs = 1; 2943 2944 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2945 M = header[1]; N = header[2]; 2946 2947 /* If global sizes are set, check if they are consistent with that given in the file */ 2948 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2949 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2950 2951 /* determine ownership of all (block) rows */ 2952 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2953 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2954 else m = newMat->rmap->n; /* Set by user */ 2955 2956 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2957 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2958 2959 /* First process needs enough room for process with most rows */ 2960 if (!rank) { 2961 mmax = rowners[1]; 2962 for (i=2; i<=size; i++) { 2963 mmax = PetscMax(mmax, rowners[i]); 2964 } 2965 } else mmax = -1; /* unused, but compilers complain */ 2966 2967 rowners[0] = 0; 2968 for (i=2; i<=size; i++) { 2969 rowners[i] += rowners[i-1]; 2970 } 2971 rstart = rowners[rank]; 2972 rend = rowners[rank+1]; 2973 2974 /* distribute row lengths to all processors */ 2975 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2976 if (!rank) { 2977 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2978 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2979 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2980 for (j=0; j<m; j++) { 2981 procsnz[0] += ourlens[j]; 2982 } 2983 for (i=1; i<size; i++) { 2984 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2985 /* calculate the number of nonzeros on each processor */ 2986 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2987 procsnz[i] += rowlengths[j]; 2988 } 2989 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2990 } 2991 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2992 } else { 2993 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2994 } 2995 2996 if (!rank) { 2997 /* determine max buffer needed and allocate it */ 2998 maxnz = 0; 2999 for (i=0; i<size; i++) { 3000 maxnz = PetscMax(maxnz,procsnz[i]); 3001 } 3002 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3003 3004 /* read in my part of the matrix column indices */ 3005 nz = procsnz[0]; 3006 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3007 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3008 3009 /* read in every one elses and ship off */ 3010 for (i=1; i<size; i++) { 3011 nz = procsnz[i]; 3012 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3013 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3014 } 3015 ierr = PetscFree(cols);CHKERRQ(ierr); 3016 } else { 3017 /* determine buffer space needed for message */ 3018 nz = 0; 3019 for (i=0; i<m; i++) { 3020 nz += ourlens[i]; 3021 } 3022 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3023 3024 /* receive message of column indices*/ 3025 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3026 } 3027 3028 /* determine column ownership if matrix is not square */ 3029 if (N != M) { 3030 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3031 else n = newMat->cmap->n; 3032 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3033 cstart = cend - n; 3034 } else { 3035 cstart = rstart; 3036 cend = rend; 3037 n = cend - cstart; 3038 } 3039 3040 /* loop over local rows, determining number of off diagonal entries */ 3041 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3042 jj = 0; 3043 for (i=0; i<m; i++) { 3044 for (j=0; j<ourlens[i]; j++) { 3045 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3046 jj++; 3047 } 3048 } 3049 3050 for (i=0; i<m; i++) { 3051 ourlens[i] -= offlens[i]; 3052 } 3053 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3054 3055 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3056 3057 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3058 3059 for (i=0; i<m; i++) { 3060 ourlens[i] += offlens[i]; 3061 } 3062 3063 if (!rank) { 3064 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3065 3066 /* read in my part of the matrix numerical values */ 3067 nz = procsnz[0]; 3068 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3069 3070 /* insert into matrix */ 3071 jj = rstart; 3072 smycols = mycols; 3073 svals = vals; 3074 for (i=0; i<m; i++) { 3075 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3076 smycols += ourlens[i]; 3077 svals += ourlens[i]; 3078 jj++; 3079 } 3080 3081 /* read in other processors and ship out */ 3082 for (i=1; i<size; i++) { 3083 nz = procsnz[i]; 3084 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3085 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3086 } 3087 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3088 } else { 3089 /* receive numeric values */ 3090 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3091 3092 /* receive message of values*/ 3093 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3094 3095 /* insert into matrix */ 3096 jj = rstart; 3097 smycols = mycols; 3098 svals = vals; 3099 for (i=0; i<m; i++) { 3100 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3101 smycols += ourlens[i]; 3102 svals += ourlens[i]; 3103 jj++; 3104 } 3105 } 3106 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3107 ierr = PetscFree(vals);CHKERRQ(ierr); 3108 ierr = PetscFree(mycols);CHKERRQ(ierr); 3109 ierr = PetscFree(rowners);CHKERRQ(ierr); 3110 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 PetscFunctionReturn(0); 3113 } 3114 3115 /* Not scalable because of ISAllGather() unless getting all columns. */ 3116 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3117 { 3118 PetscErrorCode ierr; 3119 IS iscol_local; 3120 PetscBool isstride; 3121 PetscMPIInt lisstride=0,gisstride; 3122 3123 PetscFunctionBegin; 3124 /* check if we are grabbing all columns*/ 3125 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3126 3127 if (isstride) { 3128 PetscInt start,len,mstart,mlen; 3129 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3130 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3131 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3132 if (mstart == start && mlen-mstart == len) lisstride = 1; 3133 } 3134 3135 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3136 if (gisstride) { 3137 PetscInt N; 3138 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3139 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3140 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3141 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3142 } else { 3143 PetscInt cbs; 3144 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3145 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3146 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3147 } 3148 3149 *isseq = iscol_local; 3150 PetscFunctionReturn(0); 3151 } 3152 3153 /* 3154 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3155 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3156 3157 Input Parameters: 3158 mat - matrix 3159 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3160 i.e., mat->rstart <= isrow[i] < mat->rend 3161 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3162 i.e., mat->cstart <= iscol[i] < mat->cend 3163 Output Parameter: 3164 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3165 iscol_o - sequential column index set for retrieving mat->B 3166 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3167 */ 3168 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3169 { 3170 PetscErrorCode ierr; 3171 Vec x,cmap; 3172 const PetscInt *is_idx; 3173 PetscScalar *xarray,*cmaparray; 3174 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3175 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3176 Mat B=a->B; 3177 Vec lvec=a->lvec,lcmap; 3178 PetscInt i,cstart,cend,Bn=B->cmap->N; 3179 MPI_Comm comm; 3180 VecScatter Mvctx=a->Mvctx; 3181 3182 PetscFunctionBegin; 3183 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3184 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3185 3186 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3187 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3188 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3189 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3190 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3191 3192 /* Get start indices */ 3193 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3194 isstart -= ncols; 3195 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3196 3197 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3198 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3199 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3200 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3201 for (i=0; i<ncols; i++) { 3202 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3203 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3204 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3205 } 3206 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3207 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3208 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3209 3210 /* Get iscol_d */ 3211 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3212 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3213 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3214 3215 /* Get isrow_d */ 3216 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3217 rstart = mat->rmap->rstart; 3218 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3219 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3220 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3221 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3222 3223 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3224 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3225 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3226 3227 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3228 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3229 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3230 3231 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3232 3233 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3234 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 3236 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3237 /* off-process column indices */ 3238 count = 0; 3239 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3240 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3241 3242 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3243 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3244 for (i=0; i<Bn; i++) { 3245 if (PetscRealPart(xarray[i]) > -1.0) { 3246 idx[count] = i; /* local column index in off-diagonal part B */ 3247 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3248 count++; 3249 } 3250 } 3251 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3252 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3253 3254 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3255 /* cannot ensure iscol_o has same blocksize as iscol! */ 3256 3257 ierr = PetscFree(idx);CHKERRQ(ierr); 3258 *garray = cmap1; 3259 3260 ierr = VecDestroy(&x);CHKERRQ(ierr); 3261 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3262 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3263 PetscFunctionReturn(0); 3264 } 3265 3266 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3267 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3268 { 3269 PetscErrorCode ierr; 3270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3271 Mat M = NULL; 3272 MPI_Comm comm; 3273 IS iscol_d,isrow_d,iscol_o; 3274 Mat Asub = NULL,Bsub = NULL; 3275 PetscInt n; 3276 3277 PetscFunctionBegin; 3278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3279 3280 if (call == MAT_REUSE_MATRIX) { 3281 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3282 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3283 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3284 3285 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3286 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3287 3288 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3289 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3290 3291 /* Update diagonal and off-diagonal portions of submat */ 3292 asub = (Mat_MPIAIJ*)(*submat)->data; 3293 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3294 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3295 if (n) { 3296 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3297 } 3298 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3299 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 3301 } else { /* call == MAT_INITIAL_MATRIX) */ 3302 const PetscInt *garray; 3303 PetscInt BsubN; 3304 3305 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3306 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3307 3308 /* Create local submatrices Asub and Bsub */ 3309 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3310 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3311 3312 /* Create submatrix M */ 3313 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3314 3315 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3316 asub = (Mat_MPIAIJ*)M->data; 3317 3318 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3319 n = asub->B->cmap->N; 3320 if (BsubN > n) { 3321 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3322 const PetscInt *idx; 3323 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3324 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3325 3326 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3327 j = 0; 3328 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3329 for (i=0; i<n; i++) { 3330 if (j >= BsubN) break; 3331 while (subgarray[i] > garray[j]) j++; 3332 3333 if (subgarray[i] == garray[j]) { 3334 idx_new[i] = idx[j++]; 3335 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3336 } 3337 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3338 3339 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3340 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3341 3342 } else if (BsubN < n) { 3343 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3344 } 3345 3346 ierr = PetscFree(garray);CHKERRQ(ierr); 3347 *submat = M; 3348 3349 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3350 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3351 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3352 3353 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3354 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3355 3356 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3357 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3358 } 3359 PetscFunctionReturn(0); 3360 } 3361 3362 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3363 { 3364 PetscErrorCode ierr; 3365 IS iscol_local=NULL,isrow_d; 3366 PetscInt csize; 3367 PetscInt n,i,j,start,end; 3368 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3369 MPI_Comm comm; 3370 3371 PetscFunctionBegin; 3372 /* If isrow has same processor distribution as mat, 3373 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3374 if (call == MAT_REUSE_MATRIX) { 3375 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3376 if (isrow_d) { 3377 sameRowDist = PETSC_TRUE; 3378 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3379 } else { 3380 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3381 if (iscol_local) { 3382 sameRowDist = PETSC_TRUE; 3383 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3384 } 3385 } 3386 } else { 3387 /* Check if isrow has same processor distribution as mat */ 3388 sameDist[0] = PETSC_FALSE; 3389 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3390 if (!n) { 3391 sameDist[0] = PETSC_TRUE; 3392 } else { 3393 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3394 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3395 if (i >= start && j < end) { 3396 sameDist[0] = PETSC_TRUE; 3397 } 3398 } 3399 3400 /* Check if iscol has same processor distribution as mat */ 3401 sameDist[1] = PETSC_FALSE; 3402 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3403 if (!n) { 3404 sameDist[1] = PETSC_TRUE; 3405 } else { 3406 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3407 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3408 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3409 } 3410 3411 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3412 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3413 sameRowDist = tsameDist[0]; 3414 } 3415 3416 if (sameRowDist) { 3417 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3418 /* isrow and iscol have same processor distribution as mat */ 3419 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } else { /* sameRowDist */ 3422 /* isrow has same processor distribution as mat */ 3423 if (call == MAT_INITIAL_MATRIX) { 3424 PetscBool sorted; 3425 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3426 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3427 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3428 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3429 3430 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3431 if (sorted) { 3432 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3433 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3434 PetscFunctionReturn(0); 3435 } 3436 } else { /* call == MAT_REUSE_MATRIX */ 3437 IS iscol_sub; 3438 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3439 if (iscol_sub) { 3440 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3441 PetscFunctionReturn(0); 3442 } 3443 } 3444 } 3445 } 3446 3447 /* General case: iscol -> iscol_local which has global size of iscol */ 3448 if (call == MAT_REUSE_MATRIX) { 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3450 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3451 } else { 3452 if (!iscol_local) { 3453 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3454 } 3455 } 3456 3457 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3458 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3459 3460 if (call == MAT_INITIAL_MATRIX) { 3461 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3462 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3463 } 3464 PetscFunctionReturn(0); 3465 } 3466 3467 /*@C 3468 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3469 and "off-diagonal" part of the matrix in CSR format. 3470 3471 Collective on MPI_Comm 3472 3473 Input Parameters: 3474 + comm - MPI communicator 3475 . A - "diagonal" portion of matrix 3476 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3477 - garray - global index of B columns 3478 3479 Output Parameter: 3480 . mat - the matrix, with input A as its local diagonal matrix 3481 Level: advanced 3482 3483 Notes: 3484 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3485 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3486 3487 .seealso: MatCreateMPIAIJWithSplitArrays() 3488 @*/ 3489 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3490 { 3491 PetscErrorCode ierr; 3492 Mat_MPIAIJ *maij; 3493 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3494 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3495 PetscScalar *oa=b->a; 3496 Mat Bnew; 3497 PetscInt m,n,N; 3498 3499 PetscFunctionBegin; 3500 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3501 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3502 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3503 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3504 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3505 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3506 3507 /* Get global columns of mat */ 3508 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3509 3510 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3511 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3512 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3513 maij = (Mat_MPIAIJ*)(*mat)->data; 3514 3515 (*mat)->preallocated = PETSC_TRUE; 3516 3517 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3518 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3519 3520 /* Set A as diagonal portion of *mat */ 3521 maij->A = A; 3522 3523 nz = oi[m]; 3524 for (i=0; i<nz; i++) { 3525 col = oj[i]; 3526 oj[i] = garray[col]; 3527 } 3528 3529 /* Set Bnew as off-diagonal portion of *mat */ 3530 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3531 bnew = (Mat_SeqAIJ*)Bnew->data; 3532 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3533 maij->B = Bnew; 3534 3535 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3536 3537 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3538 b->free_a = PETSC_FALSE; 3539 b->free_ij = PETSC_FALSE; 3540 ierr = MatDestroy(&B);CHKERRQ(ierr); 3541 3542 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3543 bnew->free_a = PETSC_TRUE; 3544 bnew->free_ij = PETSC_TRUE; 3545 3546 /* condense columns of maij->B */ 3547 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3548 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3549 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3550 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3551 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3552 PetscFunctionReturn(0); 3553 } 3554 3555 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3556 3557 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3558 { 3559 PetscErrorCode ierr; 3560 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3561 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3562 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3563 Mat M,Msub,B=a->B; 3564 MatScalar *aa; 3565 Mat_SeqAIJ *aij; 3566 PetscInt *garray = a->garray,*colsub,Ncols; 3567 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3568 IS iscol_sub,iscmap; 3569 const PetscInt *is_idx,*cmap; 3570 PetscBool allcolumns=PETSC_FALSE; 3571 MPI_Comm comm; 3572 3573 PetscFunctionBegin; 3574 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3575 3576 if (call == MAT_REUSE_MATRIX) { 3577 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3578 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3579 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3580 3581 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3582 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3583 3584 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3585 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3586 3587 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3588 3589 } else { /* call == MAT_INITIAL_MATRIX) */ 3590 PetscBool flg; 3591 3592 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3593 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3594 3595 /* (1) iscol -> nonscalable iscol_local */ 3596 /* Check for special case: each processor gets entire matrix columns */ 3597 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3598 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3599 if (allcolumns) { 3600 iscol_sub = iscol_local; 3601 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3602 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3603 3604 } else { 3605 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3606 PetscInt *idx,*cmap1,k; 3607 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3608 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3609 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3610 count = 0; 3611 k = 0; 3612 for (i=0; i<Ncols; i++) { 3613 j = is_idx[i]; 3614 if (j >= cstart && j < cend) { 3615 /* diagonal part of mat */ 3616 idx[count] = j; 3617 cmap1[count++] = i; /* column index in submat */ 3618 } else if (Bn) { 3619 /* off-diagonal part of mat */ 3620 if (j == garray[k]) { 3621 idx[count] = j; 3622 cmap1[count++] = i; /* column index in submat */ 3623 } else if (j > garray[k]) { 3624 while (j > garray[k] && k < Bn-1) k++; 3625 if (j == garray[k]) { 3626 idx[count] = j; 3627 cmap1[count++] = i; /* column index in submat */ 3628 } 3629 } 3630 } 3631 } 3632 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3633 3634 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3635 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3636 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3637 3638 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3639 } 3640 3641 /* (3) Create sequential Msub */ 3642 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3643 } 3644 3645 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3646 aij = (Mat_SeqAIJ*)(Msub)->data; 3647 ii = aij->i; 3648 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3649 3650 /* 3651 m - number of local rows 3652 Ncols - number of columns (same on all processors) 3653 rstart - first row in new global matrix generated 3654 */ 3655 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3656 3657 if (call == MAT_INITIAL_MATRIX) { 3658 /* (4) Create parallel newmat */ 3659 PetscMPIInt rank,size; 3660 PetscInt csize; 3661 3662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3664 3665 /* 3666 Determine the number of non-zeros in the diagonal and off-diagonal 3667 portions of the matrix in order to do correct preallocation 3668 */ 3669 3670 /* first get start and end of "diagonal" columns */ 3671 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3672 if (csize == PETSC_DECIDE) { 3673 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3674 if (mglobal == Ncols) { /* square matrix */ 3675 nlocal = m; 3676 } else { 3677 nlocal = Ncols/size + ((Ncols % size) > rank); 3678 } 3679 } else { 3680 nlocal = csize; 3681 } 3682 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3683 rstart = rend - nlocal; 3684 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3685 3686 /* next, compute all the lengths */ 3687 jj = aij->j; 3688 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3689 olens = dlens + m; 3690 for (i=0; i<m; i++) { 3691 jend = ii[i+1] - ii[i]; 3692 olen = 0; 3693 dlen = 0; 3694 for (j=0; j<jend; j++) { 3695 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3696 else dlen++; 3697 jj++; 3698 } 3699 olens[i] = olen; 3700 dlens[i] = dlen; 3701 } 3702 3703 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3704 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3705 3706 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3707 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3708 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3709 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3710 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3711 ierr = PetscFree(dlens);CHKERRQ(ierr); 3712 3713 } else { /* call == MAT_REUSE_MATRIX */ 3714 M = *newmat; 3715 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3716 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3717 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3718 /* 3719 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3720 rather than the slower MatSetValues(). 3721 */ 3722 M->was_assembled = PETSC_TRUE; 3723 M->assembled = PETSC_FALSE; 3724 } 3725 3726 /* (5) Set values of Msub to *newmat */ 3727 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3728 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3729 3730 jj = aij->j; 3731 aa = aij->a; 3732 for (i=0; i<m; i++) { 3733 row = rstart + i; 3734 nz = ii[i+1] - ii[i]; 3735 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3736 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3737 jj += nz; aa += nz; 3738 } 3739 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3740 3741 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 3744 ierr = PetscFree(colsub);CHKERRQ(ierr); 3745 3746 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3747 if (call == MAT_INITIAL_MATRIX) { 3748 *newmat = M; 3749 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3750 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3751 3752 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3753 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3754 3755 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3756 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3757 3758 if (iscol_local) { 3759 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3760 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3761 } 3762 } 3763 PetscFunctionReturn(0); 3764 } 3765 3766 /* 3767 Not great since it makes two copies of the submatrix, first an SeqAIJ 3768 in local and then by concatenating the local matrices the end result. 3769 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3770 3771 Note: This requires a sequential iscol with all indices. 3772 */ 3773 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3774 { 3775 PetscErrorCode ierr; 3776 PetscMPIInt rank,size; 3777 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3778 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3779 Mat M,Mreuse; 3780 MatScalar *aa,*vwork; 3781 MPI_Comm comm; 3782 Mat_SeqAIJ *aij; 3783 PetscBool colflag,allcolumns=PETSC_FALSE; 3784 3785 PetscFunctionBegin; 3786 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3787 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3788 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3789 3790 /* Check for special case: each processor gets entire matrix columns */ 3791 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3792 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3793 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3794 3795 if (call == MAT_REUSE_MATRIX) { 3796 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3797 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3798 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3799 } else { 3800 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3801 } 3802 3803 /* 3804 m - number of local rows 3805 n - number of columns (same on all processors) 3806 rstart - first row in new global matrix generated 3807 */ 3808 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3809 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3810 if (call == MAT_INITIAL_MATRIX) { 3811 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3812 ii = aij->i; 3813 jj = aij->j; 3814 3815 /* 3816 Determine the number of non-zeros in the diagonal and off-diagonal 3817 portions of the matrix in order to do correct preallocation 3818 */ 3819 3820 /* first get start and end of "diagonal" columns */ 3821 if (csize == PETSC_DECIDE) { 3822 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3823 if (mglobal == n) { /* square matrix */ 3824 nlocal = m; 3825 } else { 3826 nlocal = n/size + ((n % size) > rank); 3827 } 3828 } else { 3829 nlocal = csize; 3830 } 3831 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3832 rstart = rend - nlocal; 3833 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3834 3835 /* next, compute all the lengths */ 3836 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3837 olens = dlens + m; 3838 for (i=0; i<m; i++) { 3839 jend = ii[i+1] - ii[i]; 3840 olen = 0; 3841 dlen = 0; 3842 for (j=0; j<jend; j++) { 3843 if (*jj < rstart || *jj >= rend) olen++; 3844 else dlen++; 3845 jj++; 3846 } 3847 olens[i] = olen; 3848 dlens[i] = dlen; 3849 } 3850 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3851 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3852 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3853 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3854 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3855 ierr = PetscFree(dlens);CHKERRQ(ierr); 3856 } else { 3857 PetscInt ml,nl; 3858 3859 M = *newmat; 3860 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3861 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3862 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3863 /* 3864 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3865 rather than the slower MatSetValues(). 3866 */ 3867 M->was_assembled = PETSC_TRUE; 3868 M->assembled = PETSC_FALSE; 3869 } 3870 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3871 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3872 ii = aij->i; 3873 jj = aij->j; 3874 aa = aij->a; 3875 for (i=0; i<m; i++) { 3876 row = rstart + i; 3877 nz = ii[i+1] - ii[i]; 3878 cwork = jj; jj += nz; 3879 vwork = aa; aa += nz; 3880 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3881 } 3882 3883 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 *newmat = M; 3886 3887 /* save submatrix used in processor for next request */ 3888 if (call == MAT_INITIAL_MATRIX) { 3889 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3890 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3891 } 3892 PetscFunctionReturn(0); 3893 } 3894 3895 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3896 { 3897 PetscInt m,cstart, cend,j,nnz,i,d; 3898 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3899 const PetscInt *JJ; 3900 PetscScalar *values; 3901 PetscErrorCode ierr; 3902 PetscBool nooffprocentries; 3903 3904 PetscFunctionBegin; 3905 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3906 3907 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3908 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3909 m = B->rmap->n; 3910 cstart = B->cmap->rstart; 3911 cend = B->cmap->rend; 3912 rstart = B->rmap->rstart; 3913 3914 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3915 3916 #if defined(PETSC_USE_DEBUG) 3917 for (i=0; i<m && Ii; i++) { 3918 nnz = Ii[i+1]- Ii[i]; 3919 JJ = J + Ii[i]; 3920 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3921 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3922 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3923 } 3924 #endif 3925 3926 for (i=0; i<m && Ii; i++) { 3927 nnz = Ii[i+1]- Ii[i]; 3928 JJ = J + Ii[i]; 3929 nnz_max = PetscMax(nnz_max,nnz); 3930 d = 0; 3931 for (j=0; j<nnz; j++) { 3932 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3933 } 3934 d_nnz[i] = d; 3935 o_nnz[i] = nnz - d; 3936 } 3937 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3938 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3939 3940 if (v) values = (PetscScalar*)v; 3941 else { 3942 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3943 } 3944 3945 for (i=0; i<m && Ii; i++) { 3946 ii = i + rstart; 3947 nnz = Ii[i+1]- Ii[i]; 3948 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3949 } 3950 nooffprocentries = B->nooffprocentries; 3951 B->nooffprocentries = PETSC_TRUE; 3952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 B->nooffprocentries = nooffprocentries; 3955 3956 if (!v) { 3957 ierr = PetscFree(values);CHKERRQ(ierr); 3958 } 3959 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3960 PetscFunctionReturn(0); 3961 } 3962 3963 /*@ 3964 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3965 (the default parallel PETSc format). 3966 3967 Collective on MPI_Comm 3968 3969 Input Parameters: 3970 + B - the matrix 3971 . i - the indices into j for the start of each local row (starts with zero) 3972 . j - the column indices for each local row (starts with zero) 3973 - v - optional values in the matrix 3974 3975 Level: developer 3976 3977 Notes: 3978 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3979 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3980 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3981 3982 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3983 3984 The format which is used for the sparse matrix input, is equivalent to a 3985 row-major ordering.. i.e for the following matrix, the input data expected is 3986 as shown 3987 3988 $ 1 0 0 3989 $ 2 0 3 P0 3990 $ ------- 3991 $ 4 5 6 P1 3992 $ 3993 $ Process0 [P0]: rows_owned=[0,1] 3994 $ i = {0,1,3} [size = nrow+1 = 2+1] 3995 $ j = {0,0,2} [size = 3] 3996 $ v = {1,2,3} [size = 3] 3997 $ 3998 $ Process1 [P1]: rows_owned=[2] 3999 $ i = {0,3} [size = nrow+1 = 1+1] 4000 $ j = {0,1,2} [size = 3] 4001 $ v = {4,5,6} [size = 3] 4002 4003 .keywords: matrix, aij, compressed row, sparse, parallel 4004 4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4006 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4007 @*/ 4008 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4009 { 4010 PetscErrorCode ierr; 4011 4012 PetscFunctionBegin; 4013 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4014 PetscFunctionReturn(0); 4015 } 4016 4017 /*@C 4018 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4019 (the default parallel PETSc format). For good matrix assembly performance 4020 the user should preallocate the matrix storage by setting the parameters 4021 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4022 performance can be increased by more than a factor of 50. 4023 4024 Collective on MPI_Comm 4025 4026 Input Parameters: 4027 + B - the matrix 4028 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4029 (same value is used for all local rows) 4030 . d_nnz - array containing the number of nonzeros in the various rows of the 4031 DIAGONAL portion of the local submatrix (possibly different for each row) 4032 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4033 The size of this array is equal to the number of local rows, i.e 'm'. 4034 For matrices that will be factored, you must leave room for (and set) 4035 the diagonal entry even if it is zero. 4036 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4037 submatrix (same value is used for all local rows). 4038 - o_nnz - array containing the number of nonzeros in the various rows of the 4039 OFF-DIAGONAL portion of the local submatrix (possibly different for 4040 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4041 structure. The size of this array is equal to the number 4042 of local rows, i.e 'm'. 4043 4044 If the *_nnz parameter is given then the *_nz parameter is ignored 4045 4046 The AIJ format (also called the Yale sparse matrix format or 4047 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4048 storage. The stored row and column indices begin with zero. 4049 See Users-Manual: ch_mat for details. 4050 4051 The parallel matrix is partitioned such that the first m0 rows belong to 4052 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4053 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4054 4055 The DIAGONAL portion of the local submatrix of a processor can be defined 4056 as the submatrix which is obtained by extraction the part corresponding to 4057 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4058 first row that belongs to the processor, r2 is the last row belonging to 4059 the this processor, and c1-c2 is range of indices of the local part of a 4060 vector suitable for applying the matrix to. This is an mxn matrix. In the 4061 common case of a square matrix, the row and column ranges are the same and 4062 the DIAGONAL part is also square. The remaining portion of the local 4063 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4064 4065 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4066 4067 You can call MatGetInfo() to get information on how effective the preallocation was; 4068 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4069 You can also run with the option -info and look for messages with the string 4070 malloc in them to see if additional memory allocation was needed. 4071 4072 Example usage: 4073 4074 Consider the following 8x8 matrix with 34 non-zero values, that is 4075 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4076 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4077 as follows: 4078 4079 .vb 4080 1 2 0 | 0 3 0 | 0 4 4081 Proc0 0 5 6 | 7 0 0 | 8 0 4082 9 0 10 | 11 0 0 | 12 0 4083 ------------------------------------- 4084 13 0 14 | 15 16 17 | 0 0 4085 Proc1 0 18 0 | 19 20 21 | 0 0 4086 0 0 0 | 22 23 0 | 24 0 4087 ------------------------------------- 4088 Proc2 25 26 27 | 0 0 28 | 29 0 4089 30 0 0 | 31 32 33 | 0 34 4090 .ve 4091 4092 This can be represented as a collection of submatrices as: 4093 4094 .vb 4095 A B C 4096 D E F 4097 G H I 4098 .ve 4099 4100 Where the submatrices A,B,C are owned by proc0, D,E,F are 4101 owned by proc1, G,H,I are owned by proc2. 4102 4103 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4104 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'M','N' parameters are 8,8, and have the same values on all procs. 4106 4107 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4108 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4109 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4110 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4111 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4112 matrix, ans [DF] as another SeqAIJ matrix. 4113 4114 When d_nz, o_nz parameters are specified, d_nz storage elements are 4115 allocated for every row of the local diagonal submatrix, and o_nz 4116 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4117 One way to choose d_nz and o_nz is to use the max nonzerors per local 4118 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4119 In this case, the values of d_nz,o_nz are: 4120 .vb 4121 proc0 : dnz = 2, o_nz = 2 4122 proc1 : dnz = 3, o_nz = 2 4123 proc2 : dnz = 1, o_nz = 4 4124 .ve 4125 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4126 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4127 for proc3. i.e we are using 12+15+10=37 storage locations to store 4128 34 values. 4129 4130 When d_nnz, o_nnz parameters are specified, the storage is specified 4131 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4132 In the above case the values for d_nnz,o_nnz are: 4133 .vb 4134 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4135 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4136 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4137 .ve 4138 Here the space allocated is sum of all the above values i.e 34, and 4139 hence pre-allocation is perfect. 4140 4141 Level: intermediate 4142 4143 .keywords: matrix, aij, compressed row, sparse, parallel 4144 4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4146 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4147 @*/ 4148 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4149 { 4150 PetscErrorCode ierr; 4151 4152 PetscFunctionBegin; 4153 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4154 PetscValidType(B,1); 4155 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4156 PetscFunctionReturn(0); 4157 } 4158 4159 /*@ 4160 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4161 CSR format the local rows. 4162 4163 Collective on MPI_Comm 4164 4165 Input Parameters: 4166 + comm - MPI communicator 4167 . m - number of local rows (Cannot be PETSC_DECIDE) 4168 . n - This value should be the same as the local size used in creating the 4169 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4170 calculated if N is given) For square matrices n is almost always m. 4171 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4172 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4173 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4174 . j - column indices 4175 - a - matrix values 4176 4177 Output Parameter: 4178 . mat - the matrix 4179 4180 Level: intermediate 4181 4182 Notes: 4183 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4184 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4185 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4186 4187 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4188 4189 The format which is used for the sparse matrix input, is equivalent to a 4190 row-major ordering.. i.e for the following matrix, the input data expected is 4191 as shown 4192 4193 $ 1 0 0 4194 $ 2 0 3 P0 4195 $ ------- 4196 $ 4 5 6 P1 4197 $ 4198 $ Process0 [P0]: rows_owned=[0,1] 4199 $ i = {0,1,3} [size = nrow+1 = 2+1] 4200 $ j = {0,0,2} [size = 3] 4201 $ v = {1,2,3} [size = 3] 4202 $ 4203 $ Process1 [P1]: rows_owned=[2] 4204 $ i = {0,3} [size = nrow+1 = 1+1] 4205 $ j = {0,1,2} [size = 3] 4206 $ v = {4,5,6} [size = 3] 4207 4208 .keywords: matrix, aij, compressed row, sparse, parallel 4209 4210 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4211 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4212 @*/ 4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4214 { 4215 PetscErrorCode ierr; 4216 4217 PetscFunctionBegin; 4218 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4219 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4220 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4221 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4222 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4223 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4224 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4225 PetscFunctionReturn(0); 4226 } 4227 4228 /*@C 4229 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4230 (the default parallel PETSc format). For good matrix assembly performance 4231 the user should preallocate the matrix storage by setting the parameters 4232 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4233 performance can be increased by more than a factor of 50. 4234 4235 Collective on MPI_Comm 4236 4237 Input Parameters: 4238 + comm - MPI communicator 4239 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4240 This value should be the same as the local size used in creating the 4241 y vector for the matrix-vector product y = Ax. 4242 . n - This value should be the same as the local size used in creating the 4243 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4244 calculated if N is given) For square matrices n is almost always m. 4245 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4246 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4247 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4248 (same value is used for all local rows) 4249 . d_nnz - array containing the number of nonzeros in the various rows of the 4250 DIAGONAL portion of the local submatrix (possibly different for each row) 4251 or NULL, if d_nz is used to specify the nonzero structure. 4252 The size of this array is equal to the number of local rows, i.e 'm'. 4253 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4254 submatrix (same value is used for all local rows). 4255 - o_nnz - array containing the number of nonzeros in the various rows of the 4256 OFF-DIAGONAL portion of the local submatrix (possibly different for 4257 each row) or NULL, if o_nz is used to specify the nonzero 4258 structure. The size of this array is equal to the number 4259 of local rows, i.e 'm'. 4260 4261 Output Parameter: 4262 . A - the matrix 4263 4264 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4265 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4266 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4267 4268 Notes: 4269 If the *_nnz parameter is given then the *_nz parameter is ignored 4270 4271 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4272 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4273 storage requirements for this matrix. 4274 4275 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4276 processor than it must be used on all processors that share the object for 4277 that argument. 4278 4279 The user MUST specify either the local or global matrix dimensions 4280 (possibly both). 4281 4282 The parallel matrix is partitioned across processors such that the 4283 first m0 rows belong to process 0, the next m1 rows belong to 4284 process 1, the next m2 rows belong to process 2 etc.. where 4285 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4286 values corresponding to [m x N] submatrix. 4287 4288 The columns are logically partitioned with the n0 columns belonging 4289 to 0th partition, the next n1 columns belonging to the next 4290 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4291 4292 The DIAGONAL portion of the local submatrix on any given processor 4293 is the submatrix corresponding to the rows and columns m,n 4294 corresponding to the given processor. i.e diagonal matrix on 4295 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4296 etc. The remaining portion of the local submatrix [m x (N-n)] 4297 constitute the OFF-DIAGONAL portion. The example below better 4298 illustrates this concept. 4299 4300 For a square global matrix we define each processor's diagonal portion 4301 to be its local rows and the corresponding columns (a square submatrix); 4302 each processor's off-diagonal portion encompasses the remainder of the 4303 local matrix (a rectangular submatrix). 4304 4305 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4306 4307 When calling this routine with a single process communicator, a matrix of 4308 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4309 type of communicator, use the construction mechanism 4310 .vb 4311 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4312 .ve 4313 4314 $ MatCreate(...,&A); 4315 $ MatSetType(A,MATMPIAIJ); 4316 $ MatSetSizes(A, m,n,M,N); 4317 $ MatMPIAIJSetPreallocation(A,...); 4318 4319 By default, this format uses inodes (identical nodes) when possible. 4320 We search for consecutive rows with the same nonzero structure, thereby 4321 reusing matrix information to achieve increased efficiency. 4322 4323 Options Database Keys: 4324 + -mat_no_inode - Do not use inodes 4325 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4326 4327 4328 4329 Example usage: 4330 4331 Consider the following 8x8 matrix with 34 non-zero values, that is 4332 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4333 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4334 as follows 4335 4336 .vb 4337 1 2 0 | 0 3 0 | 0 4 4338 Proc0 0 5 6 | 7 0 0 | 8 0 4339 9 0 10 | 11 0 0 | 12 0 4340 ------------------------------------- 4341 13 0 14 | 15 16 17 | 0 0 4342 Proc1 0 18 0 | 19 20 21 | 0 0 4343 0 0 0 | 22 23 0 | 24 0 4344 ------------------------------------- 4345 Proc2 25 26 27 | 0 0 28 | 29 0 4346 30 0 0 | 31 32 33 | 0 34 4347 .ve 4348 4349 This can be represented as a collection of submatrices as 4350 4351 .vb 4352 A B C 4353 D E F 4354 G H I 4355 .ve 4356 4357 Where the submatrices A,B,C are owned by proc0, D,E,F are 4358 owned by proc1, G,H,I are owned by proc2. 4359 4360 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4361 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4362 The 'M','N' parameters are 8,8, and have the same values on all procs. 4363 4364 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4365 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4366 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4367 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4368 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4369 matrix, ans [DF] as another SeqAIJ matrix. 4370 4371 When d_nz, o_nz parameters are specified, d_nz storage elements are 4372 allocated for every row of the local diagonal submatrix, and o_nz 4373 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4374 One way to choose d_nz and o_nz is to use the max nonzerors per local 4375 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4376 In this case, the values of d_nz,o_nz are 4377 .vb 4378 proc0 : dnz = 2, o_nz = 2 4379 proc1 : dnz = 3, o_nz = 2 4380 proc2 : dnz = 1, o_nz = 4 4381 .ve 4382 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4383 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4384 for proc3. i.e we are using 12+15+10=37 storage locations to store 4385 34 values. 4386 4387 When d_nnz, o_nnz parameters are specified, the storage is specified 4388 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4389 In the above case the values for d_nnz,o_nnz are 4390 .vb 4391 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4392 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4393 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4394 .ve 4395 Here the space allocated is sum of all the above values i.e 34, and 4396 hence pre-allocation is perfect. 4397 4398 Level: intermediate 4399 4400 .keywords: matrix, aij, compressed row, sparse, parallel 4401 4402 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4403 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4404 @*/ 4405 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4406 { 4407 PetscErrorCode ierr; 4408 PetscMPIInt size; 4409 4410 PetscFunctionBegin; 4411 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4412 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4414 if (size > 1) { 4415 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4416 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4417 } else { 4418 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4419 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4420 } 4421 PetscFunctionReturn(0); 4422 } 4423 4424 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4425 { 4426 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4427 PetscBool flg; 4428 PetscErrorCode ierr; 4429 4430 PetscFunctionBegin; 4431 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4432 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4433 if (Ad) *Ad = a->A; 4434 if (Ao) *Ao = a->B; 4435 if (colmap) *colmap = a->garray; 4436 PetscFunctionReturn(0); 4437 } 4438 4439 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4440 { 4441 PetscErrorCode ierr; 4442 PetscInt m,N,i,rstart,nnz,Ii; 4443 PetscInt *indx; 4444 PetscScalar *values; 4445 4446 PetscFunctionBegin; 4447 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4448 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4449 PetscInt *dnz,*onz,sum,bs,cbs; 4450 4451 if (n == PETSC_DECIDE) { 4452 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4453 } 4454 /* Check sum(n) = N */ 4455 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4456 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4457 4458 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4459 rstart -= m; 4460 4461 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4462 for (i=0; i<m; i++) { 4463 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4464 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4465 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4466 } 4467 4468 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4469 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4470 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4471 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4472 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4473 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4474 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4475 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4476 } 4477 4478 /* numeric phase */ 4479 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4480 for (i=0; i<m; i++) { 4481 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4482 Ii = i + rstart; 4483 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4484 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4485 } 4486 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4487 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4492 { 4493 PetscErrorCode ierr; 4494 PetscMPIInt rank; 4495 PetscInt m,N,i,rstart,nnz; 4496 size_t len; 4497 const PetscInt *indx; 4498 PetscViewer out; 4499 char *name; 4500 Mat B; 4501 const PetscScalar *values; 4502 4503 PetscFunctionBegin; 4504 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4505 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4506 /* Should this be the type of the diagonal block of A? */ 4507 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4508 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4509 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4510 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4511 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4512 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4513 for (i=0; i<m; i++) { 4514 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4515 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4516 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4517 } 4518 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4519 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 4521 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4522 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4523 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4524 sprintf(name,"%s.%d",outfile,rank); 4525 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4526 ierr = PetscFree(name);CHKERRQ(ierr); 4527 ierr = MatView(B,out);CHKERRQ(ierr); 4528 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4529 ierr = MatDestroy(&B);CHKERRQ(ierr); 4530 PetscFunctionReturn(0); 4531 } 4532 4533 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4534 { 4535 PetscErrorCode ierr; 4536 Mat_Merge_SeqsToMPI *merge; 4537 PetscContainer container; 4538 4539 PetscFunctionBegin; 4540 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 if (container) { 4542 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4555 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4556 ierr = PetscFree(merge);CHKERRQ(ierr); 4557 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4558 } 4559 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 #include <../src/mat/utils/freespace.h> 4564 #include <petscbt.h> 4565 4566 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4567 { 4568 PetscErrorCode ierr; 4569 MPI_Comm comm; 4570 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4571 PetscMPIInt size,rank,taga,*len_s; 4572 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4573 PetscInt proc,m; 4574 PetscInt **buf_ri,**buf_rj; 4575 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4576 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4577 MPI_Request *s_waits,*r_waits; 4578 MPI_Status *status; 4579 MatScalar *aa=a->a; 4580 MatScalar **abuf_r,*ba_i; 4581 Mat_Merge_SeqsToMPI *merge; 4582 PetscContainer container; 4583 4584 PetscFunctionBegin; 4585 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4586 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4587 4588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4590 4591 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4592 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4593 4594 bi = merge->bi; 4595 bj = merge->bj; 4596 buf_ri = merge->buf_ri; 4597 buf_rj = merge->buf_rj; 4598 4599 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4600 owners = merge->rowmap->range; 4601 len_s = merge->len_s; 4602 4603 /* send and recv matrix values */ 4604 /*-----------------------------*/ 4605 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4606 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4607 4608 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4609 for (proc=0,k=0; proc<size; proc++) { 4610 if (!len_s[proc]) continue; 4611 i = owners[proc]; 4612 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4613 k++; 4614 } 4615 4616 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4617 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4618 ierr = PetscFree(status);CHKERRQ(ierr); 4619 4620 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4621 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4622 4623 /* insert mat values of mpimat */ 4624 /*----------------------------*/ 4625 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4626 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4627 4628 for (k=0; k<merge->nrecv; k++) { 4629 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4630 nrows = *(buf_ri_k[k]); 4631 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4632 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4633 } 4634 4635 /* set values of ba */ 4636 m = merge->rowmap->n; 4637 for (i=0; i<m; i++) { 4638 arow = owners[rank] + i; 4639 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4640 bnzi = bi[i+1] - bi[i]; 4641 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4642 4643 /* add local non-zero vals of this proc's seqmat into ba */ 4644 anzi = ai[arow+1] - ai[arow]; 4645 aj = a->j + ai[arow]; 4646 aa = a->a + ai[arow]; 4647 nextaj = 0; 4648 for (j=0; nextaj<anzi; j++) { 4649 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4650 ba_i[j] += aa[nextaj++]; 4651 } 4652 } 4653 4654 /* add received vals into ba */ 4655 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4656 /* i-th row */ 4657 if (i == *nextrow[k]) { 4658 anzi = *(nextai[k]+1) - *nextai[k]; 4659 aj = buf_rj[k] + *(nextai[k]); 4660 aa = abuf_r[k] + *(nextai[k]); 4661 nextaj = 0; 4662 for (j=0; nextaj<anzi; j++) { 4663 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4664 ba_i[j] += aa[nextaj++]; 4665 } 4666 } 4667 nextrow[k]++; nextai[k]++; 4668 } 4669 } 4670 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4671 } 4672 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4673 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 4675 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4676 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4677 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4678 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4679 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4680 PetscFunctionReturn(0); 4681 } 4682 4683 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4684 { 4685 PetscErrorCode ierr; 4686 Mat B_mpi; 4687 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4688 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4689 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4690 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4691 PetscInt len,proc,*dnz,*onz,bs,cbs; 4692 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4693 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4694 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4695 MPI_Status *status; 4696 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4697 PetscBT lnkbt; 4698 Mat_Merge_SeqsToMPI *merge; 4699 PetscContainer container; 4700 4701 PetscFunctionBegin; 4702 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4703 4704 /* make sure it is a PETSc comm */ 4705 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4706 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4708 4709 ierr = PetscNew(&merge);CHKERRQ(ierr); 4710 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4711 4712 /* determine row ownership */ 4713 /*---------------------------------------------------------*/ 4714 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4715 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4716 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4717 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4718 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4719 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4720 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4721 4722 m = merge->rowmap->n; 4723 owners = merge->rowmap->range; 4724 4725 /* determine the number of messages to send, their lengths */ 4726 /*---------------------------------------------------------*/ 4727 len_s = merge->len_s; 4728 4729 len = 0; /* length of buf_si[] */ 4730 merge->nsend = 0; 4731 for (proc=0; proc<size; proc++) { 4732 len_si[proc] = 0; 4733 if (proc == rank) { 4734 len_s[proc] = 0; 4735 } else { 4736 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4737 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4738 } 4739 if (len_s[proc]) { 4740 merge->nsend++; 4741 nrows = 0; 4742 for (i=owners[proc]; i<owners[proc+1]; i++) { 4743 if (ai[i+1] > ai[i]) nrows++; 4744 } 4745 len_si[proc] = 2*(nrows+1); 4746 len += len_si[proc]; 4747 } 4748 } 4749 4750 /* determine the number and length of messages to receive for ij-structure */ 4751 /*-------------------------------------------------------------------------*/ 4752 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4753 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4754 4755 /* post the Irecv of j-structure */ 4756 /*-------------------------------*/ 4757 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4758 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4759 4760 /* post the Isend of j-structure */ 4761 /*--------------------------------*/ 4762 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4763 4764 for (proc=0, k=0; proc<size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4768 k++; 4769 } 4770 4771 /* receives and sends of j-structure are complete */ 4772 /*------------------------------------------------*/ 4773 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4774 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4775 4776 /* send and recv i-structure */ 4777 /*---------------------------*/ 4778 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4779 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4780 4781 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4782 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4783 for (proc=0,k=0; proc<size; proc++) { 4784 if (!len_s[proc]) continue; 4785 /* form outgoing message for i-structure: 4786 buf_si[0]: nrows to be sent 4787 [1:nrows]: row index (global) 4788 [nrows+1:2*nrows+1]: i-structure index 4789 */ 4790 /*-------------------------------------------*/ 4791 nrows = len_si[proc]/2 - 1; 4792 buf_si_i = buf_si + nrows+1; 4793 buf_si[0] = nrows; 4794 buf_si_i[0] = 0; 4795 nrows = 0; 4796 for (i=owners[proc]; i<owners[proc+1]; i++) { 4797 anzi = ai[i+1] - ai[i]; 4798 if (anzi) { 4799 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4800 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4801 nrows++; 4802 } 4803 } 4804 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4805 k++; 4806 buf_si += len_si[proc]; 4807 } 4808 4809 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4810 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4811 4812 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4813 for (i=0; i<merge->nrecv; i++) { 4814 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4815 } 4816 4817 ierr = PetscFree(len_si);CHKERRQ(ierr); 4818 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4819 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4820 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4821 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4822 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4823 ierr = PetscFree(status);CHKERRQ(ierr); 4824 4825 /* compute a local seq matrix in each processor */ 4826 /*----------------------------------------------*/ 4827 /* allocate bi array and free space for accumulating nonzero column info */ 4828 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4829 bi[0] = 0; 4830 4831 /* create and initialize a linked list */ 4832 nlnk = N+1; 4833 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4834 4835 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4836 len = ai[owners[rank+1]] - ai[owners[rank]]; 4837 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4838 4839 current_space = free_space; 4840 4841 /* determine symbolic info for each local row */ 4842 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4843 4844 for (k=0; k<merge->nrecv; k++) { 4845 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4846 nrows = *buf_ri_k[k]; 4847 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4848 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4849 } 4850 4851 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4852 len = 0; 4853 for (i=0; i<m; i++) { 4854 bnzi = 0; 4855 /* add local non-zero cols of this proc's seqmat into lnk */ 4856 arow = owners[rank] + i; 4857 anzi = ai[arow+1] - ai[arow]; 4858 aj = a->j + ai[arow]; 4859 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4860 bnzi += nlnk; 4861 /* add received col data into lnk */ 4862 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4863 if (i == *nextrow[k]) { /* i-th row */ 4864 anzi = *(nextai[k]+1) - *nextai[k]; 4865 aj = buf_rj[k] + *nextai[k]; 4866 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4867 bnzi += nlnk; 4868 nextrow[k]++; nextai[k]++; 4869 } 4870 } 4871 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4872 4873 /* if free space is not available, make more free space */ 4874 if (current_space->local_remaining<bnzi) { 4875 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4876 nspacedouble++; 4877 } 4878 /* copy data into free space, then initialize lnk */ 4879 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4880 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4881 4882 current_space->array += bnzi; 4883 current_space->local_used += bnzi; 4884 current_space->local_remaining -= bnzi; 4885 4886 bi[i+1] = bi[i] + bnzi; 4887 } 4888 4889 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4890 4891 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4892 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4893 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4894 4895 /* create symbolic parallel matrix B_mpi */ 4896 /*---------------------------------------*/ 4897 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4898 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4899 if (n==PETSC_DECIDE) { 4900 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4901 } else { 4902 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4903 } 4904 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4905 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4906 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4907 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4908 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4909 4910 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4911 B_mpi->assembled = PETSC_FALSE; 4912 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4913 merge->bi = bi; 4914 merge->bj = bj; 4915 merge->buf_ri = buf_ri; 4916 merge->buf_rj = buf_rj; 4917 merge->coi = NULL; 4918 merge->coj = NULL; 4919 merge->owners_co = NULL; 4920 4921 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4922 4923 /* attach the supporting struct to B_mpi for reuse */ 4924 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4925 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4926 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4927 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4928 *mpimat = B_mpi; 4929 4930 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4931 PetscFunctionReturn(0); 4932 } 4933 4934 /*@C 4935 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4936 matrices from each processor 4937 4938 Collective on MPI_Comm 4939 4940 Input Parameters: 4941 + comm - the communicators the parallel matrix will live on 4942 . seqmat - the input sequential matrices 4943 . m - number of local rows (or PETSC_DECIDE) 4944 . n - number of local columns (or PETSC_DECIDE) 4945 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4946 4947 Output Parameter: 4948 . mpimat - the parallel matrix generated 4949 4950 Level: advanced 4951 4952 Notes: 4953 The dimensions of the sequential matrix in each processor MUST be the same. 4954 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4955 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4956 @*/ 4957 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4958 { 4959 PetscErrorCode ierr; 4960 PetscMPIInt size; 4961 4962 PetscFunctionBegin; 4963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4964 if (size == 1) { 4965 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4966 if (scall == MAT_INITIAL_MATRIX) { 4967 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4968 } else { 4969 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4970 } 4971 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4972 PetscFunctionReturn(0); 4973 } 4974 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4975 if (scall == MAT_INITIAL_MATRIX) { 4976 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4977 } 4978 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4979 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4980 PetscFunctionReturn(0); 4981 } 4982 4983 /*@ 4984 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4985 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4986 with MatGetSize() 4987 4988 Not Collective 4989 4990 Input Parameters: 4991 + A - the matrix 4992 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4993 4994 Output Parameter: 4995 . A_loc - the local sequential matrix generated 4996 4997 Level: developer 4998 4999 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5000 5001 @*/ 5002 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5003 { 5004 PetscErrorCode ierr; 5005 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5006 Mat_SeqAIJ *mat,*a,*b; 5007 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5008 MatScalar *aa,*ba,*cam; 5009 PetscScalar *ca; 5010 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5011 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5012 PetscBool match; 5013 MPI_Comm comm; 5014 PetscMPIInt size; 5015 5016 PetscFunctionBegin; 5017 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5018 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5019 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5020 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5021 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5022 5023 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5024 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5025 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5026 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5027 aa = a->a; ba = b->a; 5028 if (scall == MAT_INITIAL_MATRIX) { 5029 if (size == 1) { 5030 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5031 PetscFunctionReturn(0); 5032 } 5033 5034 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5035 ci[0] = 0; 5036 for (i=0; i<am; i++) { 5037 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5038 } 5039 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5040 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5041 k = 0; 5042 for (i=0; i<am; i++) { 5043 ncols_o = bi[i+1] - bi[i]; 5044 ncols_d = ai[i+1] - ai[i]; 5045 /* off-diagonal portion of A */ 5046 for (jo=0; jo<ncols_o; jo++) { 5047 col = cmap[*bj]; 5048 if (col >= cstart) break; 5049 cj[k] = col; bj++; 5050 ca[k++] = *ba++; 5051 } 5052 /* diagonal portion of A */ 5053 for (j=0; j<ncols_d; j++) { 5054 cj[k] = cstart + *aj++; 5055 ca[k++] = *aa++; 5056 } 5057 /* off-diagonal portion of A */ 5058 for (j=jo; j<ncols_o; j++) { 5059 cj[k] = cmap[*bj++]; 5060 ca[k++] = *ba++; 5061 } 5062 } 5063 /* put together the new matrix */ 5064 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5065 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5066 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5067 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5068 mat->free_a = PETSC_TRUE; 5069 mat->free_ij = PETSC_TRUE; 5070 mat->nonew = 0; 5071 } else if (scall == MAT_REUSE_MATRIX) { 5072 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5073 ci = mat->i; cj = mat->j; cam = mat->a; 5074 for (i=0; i<am; i++) { 5075 /* off-diagonal portion of A */ 5076 ncols_o = bi[i+1] - bi[i]; 5077 for (jo=0; jo<ncols_o; jo++) { 5078 col = cmap[*bj]; 5079 if (col >= cstart) break; 5080 *cam++ = *ba++; bj++; 5081 } 5082 /* diagonal portion of A */ 5083 ncols_d = ai[i+1] - ai[i]; 5084 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5085 /* off-diagonal portion of A */ 5086 for (j=jo; j<ncols_o; j++) { 5087 *cam++ = *ba++; bj++; 5088 } 5089 } 5090 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5091 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 /*@C 5096 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5097 5098 Not Collective 5099 5100 Input Parameters: 5101 + A - the matrix 5102 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5103 - row, col - index sets of rows and columns to extract (or NULL) 5104 5105 Output Parameter: 5106 . A_loc - the local sequential matrix generated 5107 5108 Level: developer 5109 5110 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5111 5112 @*/ 5113 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5114 { 5115 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5116 PetscErrorCode ierr; 5117 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5118 IS isrowa,iscola; 5119 Mat *aloc; 5120 PetscBool match; 5121 5122 PetscFunctionBegin; 5123 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5124 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5125 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5126 if (!row) { 5127 start = A->rmap->rstart; end = A->rmap->rend; 5128 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5129 } else { 5130 isrowa = *row; 5131 } 5132 if (!col) { 5133 start = A->cmap->rstart; 5134 cmap = a->garray; 5135 nzA = a->A->cmap->n; 5136 nzB = a->B->cmap->n; 5137 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5138 ncols = 0; 5139 for (i=0; i<nzB; i++) { 5140 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5141 else break; 5142 } 5143 imark = i; 5144 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5145 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5146 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5147 } else { 5148 iscola = *col; 5149 } 5150 if (scall != MAT_INITIAL_MATRIX) { 5151 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5152 aloc[0] = *A_loc; 5153 } 5154 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5155 if (!col) { /* attach global id of condensed columns */ 5156 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5157 } 5158 *A_loc = aloc[0]; 5159 ierr = PetscFree(aloc);CHKERRQ(ierr); 5160 if (!row) { 5161 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5162 } 5163 if (!col) { 5164 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5165 } 5166 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5167 PetscFunctionReturn(0); 5168 } 5169 5170 /*@C 5171 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5172 5173 Collective on Mat 5174 5175 Input Parameters: 5176 + A,B - the matrices in mpiaij format 5177 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5178 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5179 5180 Output Parameter: 5181 + rowb, colb - index sets of rows and columns of B to extract 5182 - B_seq - the sequential matrix generated 5183 5184 Level: developer 5185 5186 @*/ 5187 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5188 { 5189 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5190 PetscErrorCode ierr; 5191 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5192 IS isrowb,iscolb; 5193 Mat *bseq=NULL; 5194 5195 PetscFunctionBegin; 5196 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5197 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5198 } 5199 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5200 5201 if (scall == MAT_INITIAL_MATRIX) { 5202 start = A->cmap->rstart; 5203 cmap = a->garray; 5204 nzA = a->A->cmap->n; 5205 nzB = a->B->cmap->n; 5206 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5207 ncols = 0; 5208 for (i=0; i<nzB; i++) { /* row < local row index */ 5209 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5210 else break; 5211 } 5212 imark = i; 5213 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5214 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5215 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5216 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5217 } else { 5218 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5219 isrowb = *rowb; iscolb = *colb; 5220 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5221 bseq[0] = *B_seq; 5222 } 5223 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5224 *B_seq = bseq[0]; 5225 ierr = PetscFree(bseq);CHKERRQ(ierr); 5226 if (!rowb) { 5227 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5228 } else { 5229 *rowb = isrowb; 5230 } 5231 if (!colb) { 5232 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5233 } else { 5234 *colb = iscolb; 5235 } 5236 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5237 PetscFunctionReturn(0); 5238 } 5239 5240 #include <petsc/private/vecscatterimpl.h> 5241 /* 5242 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5243 of the OFF-DIAGONAL portion of local A 5244 5245 Collective on Mat 5246 5247 Input Parameters: 5248 + A,B - the matrices in mpiaij format 5249 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5250 5251 Output Parameter: 5252 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5253 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5254 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5255 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5256 5257 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5258 for this matrix. This is not desirable.. 5259 5260 Level: developer 5261 5262 */ 5263 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5264 { 5265 VecScatter_MPI_General *gen_to,*gen_from; 5266 PetscErrorCode ierr; 5267 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5268 Mat_SeqAIJ *b_oth; 5269 VecScatter ctx; 5270 MPI_Comm comm; 5271 PetscMPIInt *rprocs,*sprocs,tag,rank; 5272 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5273 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5274 PetscScalar *b_otha,*bufa,*bufA,*vals; 5275 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5276 MPI_Request *rwaits = NULL,*swaits = NULL; 5277 MPI_Status *sstatus,rstatus; 5278 PetscMPIInt jj,size; 5279 VecScatterType type; 5280 PetscBool mpi1; 5281 5282 PetscFunctionBegin; 5283 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5284 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5285 5286 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5287 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5288 } 5289 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5290 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5291 5292 if (size == 1) { 5293 startsj_s = NULL; 5294 bufa_ptr = NULL; 5295 *B_oth = NULL; 5296 PetscFunctionReturn(0); 5297 } 5298 5299 ctx = a->Mvctx; 5300 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5301 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5302 if (!mpi1) { 5303 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5304 thus create a->Mvctx_mpi1 */ 5305 if (!a->Mvctx_mpi1) { 5306 a->Mvctx_mpi1_flg = PETSC_TRUE; 5307 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5308 } 5309 ctx = a->Mvctx_mpi1; 5310 } 5311 tag = ((PetscObject)ctx)->tag; 5312 5313 gen_to = (VecScatter_MPI_General*)ctx->todata; 5314 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5315 nrecvs = gen_from->n; 5316 nsends = gen_to->n; 5317 5318 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5319 srow = gen_to->indices; /* local row index to be sent */ 5320 sstarts = gen_to->starts; 5321 sprocs = gen_to->procs; 5322 sstatus = gen_to->sstatus; 5323 sbs = gen_to->bs; 5324 rstarts = gen_from->starts; 5325 rprocs = gen_from->procs; 5326 rbs = gen_from->bs; 5327 5328 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5329 if (scall == MAT_INITIAL_MATRIX) { 5330 /* i-array */ 5331 /*---------*/ 5332 /* post receives */ 5333 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5334 for (i=0; i<nrecvs; i++) { 5335 rowlen = rvalues + rstarts[i]*rbs; 5336 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5337 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5338 } 5339 5340 /* pack the outgoing message */ 5341 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5342 5343 sstartsj[0] = 0; 5344 rstartsj[0] = 0; 5345 len = 0; /* total length of j or a array to be sent */ 5346 k = 0; 5347 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5348 for (i=0; i<nsends; i++) { 5349 rowlen = svalues + sstarts[i]*sbs; 5350 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5351 for (j=0; j<nrows; j++) { 5352 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5353 for (l=0; l<sbs; l++) { 5354 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5355 5356 rowlen[j*sbs+l] = ncols; 5357 5358 len += ncols; 5359 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5360 } 5361 k++; 5362 } 5363 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5364 5365 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5366 } 5367 /* recvs and sends of i-array are completed */ 5368 i = nrecvs; 5369 while (i--) { 5370 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5371 } 5372 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5373 ierr = PetscFree(svalues);CHKERRQ(ierr); 5374 5375 /* allocate buffers for sending j and a arrays */ 5376 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5377 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5378 5379 /* create i-array of B_oth */ 5380 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5381 5382 b_othi[0] = 0; 5383 len = 0; /* total length of j or a array to be received */ 5384 k = 0; 5385 for (i=0; i<nrecvs; i++) { 5386 rowlen = rvalues + rstarts[i]*rbs; 5387 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5388 for (j=0; j<nrows; j++) { 5389 b_othi[k+1] = b_othi[k] + rowlen[j]; 5390 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5391 k++; 5392 } 5393 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5394 } 5395 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5396 5397 /* allocate space for j and a arrrays of B_oth */ 5398 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5399 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5400 5401 /* j-array */ 5402 /*---------*/ 5403 /* post receives of j-array */ 5404 for (i=0; i<nrecvs; i++) { 5405 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5406 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5407 } 5408 5409 /* pack the outgoing message j-array */ 5410 k = 0; 5411 for (i=0; i<nsends; i++) { 5412 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5413 bufJ = bufj+sstartsj[i]; 5414 for (j=0; j<nrows; j++) { 5415 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5416 for (ll=0; ll<sbs; ll++) { 5417 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5418 for (l=0; l<ncols; l++) { 5419 *bufJ++ = cols[l]; 5420 } 5421 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5422 } 5423 } 5424 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5425 } 5426 5427 /* recvs and sends of j-array are completed */ 5428 i = nrecvs; 5429 while (i--) { 5430 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5431 } 5432 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5433 } else if (scall == MAT_REUSE_MATRIX) { 5434 sstartsj = *startsj_s; 5435 rstartsj = *startsj_r; 5436 bufa = *bufa_ptr; 5437 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5438 b_otha = b_oth->a; 5439 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5440 5441 /* a-array */ 5442 /*---------*/ 5443 /* post receives of a-array */ 5444 for (i=0; i<nrecvs; i++) { 5445 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5446 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5447 } 5448 5449 /* pack the outgoing message a-array */ 5450 k = 0; 5451 for (i=0; i<nsends; i++) { 5452 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5453 bufA = bufa+sstartsj[i]; 5454 for (j=0; j<nrows; j++) { 5455 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5456 for (ll=0; ll<sbs; ll++) { 5457 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5458 for (l=0; l<ncols; l++) { 5459 *bufA++ = vals[l]; 5460 } 5461 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5462 } 5463 } 5464 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5465 } 5466 /* recvs and sends of a-array are completed */ 5467 i = nrecvs; 5468 while (i--) { 5469 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5470 } 5471 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5472 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5473 5474 if (scall == MAT_INITIAL_MATRIX) { 5475 /* put together the new matrix */ 5476 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5477 5478 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5479 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5480 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5481 b_oth->free_a = PETSC_TRUE; 5482 b_oth->free_ij = PETSC_TRUE; 5483 b_oth->nonew = 0; 5484 5485 ierr = PetscFree(bufj);CHKERRQ(ierr); 5486 if (!startsj_s || !bufa_ptr) { 5487 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5488 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5489 } else { 5490 *startsj_s = sstartsj; 5491 *startsj_r = rstartsj; 5492 *bufa_ptr = bufa; 5493 } 5494 } 5495 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5496 PetscFunctionReturn(0); 5497 } 5498 5499 /*@C 5500 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5501 5502 Not Collective 5503 5504 Input Parameters: 5505 . A - The matrix in mpiaij format 5506 5507 Output Parameter: 5508 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5509 . colmap - A map from global column index to local index into lvec 5510 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5511 5512 Level: developer 5513 5514 @*/ 5515 #if defined(PETSC_USE_CTABLE) 5516 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5517 #else 5518 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5519 #endif 5520 { 5521 Mat_MPIAIJ *a; 5522 5523 PetscFunctionBegin; 5524 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5525 PetscValidPointer(lvec, 2); 5526 PetscValidPointer(colmap, 3); 5527 PetscValidPointer(multScatter, 4); 5528 a = (Mat_MPIAIJ*) A->data; 5529 if (lvec) *lvec = a->lvec; 5530 if (colmap) *colmap = a->colmap; 5531 if (multScatter) *multScatter = a->Mvctx; 5532 PetscFunctionReturn(0); 5533 } 5534 5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5536 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5538 #if defined(PETSC_HAVE_MKL_SPARSE) 5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5540 #endif 5541 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5542 #if defined(PETSC_HAVE_ELEMENTAL) 5543 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5544 #endif 5545 #if defined(PETSC_HAVE_HYPRE) 5546 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5547 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5548 #endif 5549 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5550 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5551 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5552 5553 /* 5554 Computes (B'*A')' since computing B*A directly is untenable 5555 5556 n p p 5557 ( ) ( ) ( ) 5558 m ( A ) * n ( B ) = m ( C ) 5559 ( ) ( ) ( ) 5560 5561 */ 5562 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5563 { 5564 PetscErrorCode ierr; 5565 Mat At,Bt,Ct; 5566 5567 PetscFunctionBegin; 5568 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5569 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5570 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5571 ierr = MatDestroy(&At);CHKERRQ(ierr); 5572 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5573 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5574 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5575 PetscFunctionReturn(0); 5576 } 5577 5578 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5579 { 5580 PetscErrorCode ierr; 5581 PetscInt m=A->rmap->n,n=B->cmap->n; 5582 Mat Cmat; 5583 5584 PetscFunctionBegin; 5585 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5586 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5587 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5588 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5589 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5590 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5591 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5592 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5593 5594 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5595 5596 *C = Cmat; 5597 PetscFunctionReturn(0); 5598 } 5599 5600 /* ----------------------------------------------------------------*/ 5601 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5602 { 5603 PetscErrorCode ierr; 5604 5605 PetscFunctionBegin; 5606 if (scall == MAT_INITIAL_MATRIX) { 5607 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5608 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5609 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5610 } 5611 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5612 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5613 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5614 PetscFunctionReturn(0); 5615 } 5616 5617 /*MC 5618 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5619 5620 Options Database Keys: 5621 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5622 5623 Level: beginner 5624 5625 .seealso: MatCreateAIJ() 5626 M*/ 5627 5628 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5629 { 5630 Mat_MPIAIJ *b; 5631 PetscErrorCode ierr; 5632 PetscMPIInt size; 5633 5634 PetscFunctionBegin; 5635 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5636 5637 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5638 B->data = (void*)b; 5639 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5640 B->assembled = PETSC_FALSE; 5641 B->insertmode = NOT_SET_VALUES; 5642 b->size = size; 5643 5644 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5645 5646 /* build cache for off array entries formed */ 5647 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5648 5649 b->donotstash = PETSC_FALSE; 5650 b->colmap = 0; 5651 b->garray = 0; 5652 b->roworiented = PETSC_TRUE; 5653 5654 /* stuff used for matrix vector multiply */ 5655 b->lvec = NULL; 5656 b->Mvctx = NULL; 5657 5658 /* stuff for MatGetRow() */ 5659 b->rowindices = 0; 5660 b->rowvalues = 0; 5661 b->getrowactive = PETSC_FALSE; 5662 5663 /* flexible pointer used in CUSP/CUSPARSE classes */ 5664 b->spptr = NULL; 5665 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5676 #if defined(PETSC_HAVE_MKL_SPARSE) 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5678 #endif 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5681 #if defined(PETSC_HAVE_ELEMENTAL) 5682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5683 #endif 5684 #if defined(PETSC_HAVE_HYPRE) 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5686 #endif 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5692 #if defined(PETSC_HAVE_HYPRE) 5693 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5694 #endif 5695 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5696 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5697 PetscFunctionReturn(0); 5698 } 5699 5700 /*@C 5701 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5702 and "off-diagonal" part of the matrix in CSR format. 5703 5704 Collective on MPI_Comm 5705 5706 Input Parameters: 5707 + comm - MPI communicator 5708 . m - number of local rows (Cannot be PETSC_DECIDE) 5709 . n - This value should be the same as the local size used in creating the 5710 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5711 calculated if N is given) For square matrices n is almost always m. 5712 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5713 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5714 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5715 . j - column indices 5716 . a - matrix values 5717 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5718 . oj - column indices 5719 - oa - matrix values 5720 5721 Output Parameter: 5722 . mat - the matrix 5723 5724 Level: advanced 5725 5726 Notes: 5727 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5728 must free the arrays once the matrix has been destroyed and not before. 5729 5730 The i and j indices are 0 based 5731 5732 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5733 5734 This sets local rows and cannot be used to set off-processor values. 5735 5736 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5737 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5738 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5739 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5740 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5741 communication if it is known that only local entries will be set. 5742 5743 .keywords: matrix, aij, compressed row, sparse, parallel 5744 5745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5746 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5747 @*/ 5748 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5749 { 5750 PetscErrorCode ierr; 5751 Mat_MPIAIJ *maij; 5752 5753 PetscFunctionBegin; 5754 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5755 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5756 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5757 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5758 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5759 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5760 maij = (Mat_MPIAIJ*) (*mat)->data; 5761 5762 (*mat)->preallocated = PETSC_TRUE; 5763 5764 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5765 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5766 5767 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5768 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5769 5770 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5771 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 5775 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5776 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5777 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5779 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5780 PetscFunctionReturn(0); 5781 } 5782 5783 /* 5784 Special version for direct calls from Fortran 5785 */ 5786 #include <petsc/private/fortranimpl.h> 5787 5788 /* Change these macros so can be used in void function */ 5789 #undef CHKERRQ 5790 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5791 #undef SETERRQ2 5792 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5793 #undef SETERRQ3 5794 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5795 #undef SETERRQ 5796 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5797 5798 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5799 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5800 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5801 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5802 #else 5803 #endif 5804 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5805 { 5806 Mat mat = *mmat; 5807 PetscInt m = *mm, n = *mn; 5808 InsertMode addv = *maddv; 5809 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5810 PetscScalar value; 5811 PetscErrorCode ierr; 5812 5813 MatCheckPreallocated(mat,1); 5814 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5815 5816 #if defined(PETSC_USE_DEBUG) 5817 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5818 #endif 5819 { 5820 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5821 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5822 PetscBool roworiented = aij->roworiented; 5823 5824 /* Some Variables required in the macro */ 5825 Mat A = aij->A; 5826 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5827 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5828 MatScalar *aa = a->a; 5829 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5830 Mat B = aij->B; 5831 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5832 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5833 MatScalar *ba = b->a; 5834 5835 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5836 PetscInt nonew = a->nonew; 5837 MatScalar *ap1,*ap2; 5838 5839 PetscFunctionBegin; 5840 for (i=0; i<m; i++) { 5841 if (im[i] < 0) continue; 5842 #if defined(PETSC_USE_DEBUG) 5843 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5844 #endif 5845 if (im[i] >= rstart && im[i] < rend) { 5846 row = im[i] - rstart; 5847 lastcol1 = -1; 5848 rp1 = aj + ai[row]; 5849 ap1 = aa + ai[row]; 5850 rmax1 = aimax[row]; 5851 nrow1 = ailen[row]; 5852 low1 = 0; 5853 high1 = nrow1; 5854 lastcol2 = -1; 5855 rp2 = bj + bi[row]; 5856 ap2 = ba + bi[row]; 5857 rmax2 = bimax[row]; 5858 nrow2 = bilen[row]; 5859 low2 = 0; 5860 high2 = nrow2; 5861 5862 for (j=0; j<n; j++) { 5863 if (roworiented) value = v[i*n+j]; 5864 else value = v[i+j*m]; 5865 if (in[j] >= cstart && in[j] < cend) { 5866 col = in[j] - cstart; 5867 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5868 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5869 } else if (in[j] < 0) continue; 5870 #if defined(PETSC_USE_DEBUG) 5871 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5872 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5873 #endif 5874 else { 5875 if (mat->was_assembled) { 5876 if (!aij->colmap) { 5877 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5878 } 5879 #if defined(PETSC_USE_CTABLE) 5880 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5881 col--; 5882 #else 5883 col = aij->colmap[in[j]] - 1; 5884 #endif 5885 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5886 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5887 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5888 col = in[j]; 5889 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5890 B = aij->B; 5891 b = (Mat_SeqAIJ*)B->data; 5892 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5893 rp2 = bj + bi[row]; 5894 ap2 = ba + bi[row]; 5895 rmax2 = bimax[row]; 5896 nrow2 = bilen[row]; 5897 low2 = 0; 5898 high2 = nrow2; 5899 bm = aij->B->rmap->n; 5900 ba = b->a; 5901 } 5902 } else col = in[j]; 5903 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5904 } 5905 } 5906 } else if (!aij->donotstash) { 5907 if (roworiented) { 5908 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5909 } else { 5910 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5911 } 5912 } 5913 } 5914 } 5915 PetscFunctionReturnVoid(); 5916 } 5917