1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 944 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 946 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 964 PetscErrorCode ierr; 965 966 PetscFunctionBegin; 967 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 969 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 975 { 976 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 977 PetscErrorCode ierr; 978 PetscBool merged; 979 980 PetscFunctionBegin; 981 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 982 /* do nondiagonal part */ 983 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 984 if (!merged) { 985 /* send it on its way */ 986 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 987 /* do local part */ 988 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 989 /* receive remote parts: note this assumes the values are not actually */ 990 /* added in yy until the next line, */ 991 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 992 } else { 993 /* do local part */ 994 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 995 /* send it on its way */ 996 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 /* values actually were received in the Begin() but we need to call this nop */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1004 { 1005 MPI_Comm comm; 1006 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1007 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1008 IS Me,Notme; 1009 PetscErrorCode ierr; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscMPIInt size; 1012 1013 PetscFunctionBegin; 1014 /* Easy test: symmetric diagonal block */ 1015 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1016 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1017 if (!*f) PetscFunctionReturn(0); 1018 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1019 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1020 if (size == 1) PetscFunctionReturn(0); 1021 1022 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1023 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1024 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1025 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1026 for (i=0; i<first; i++) notme[i] = i; 1027 for (i=last; i<M; i++) notme[i-last+first] = i; 1028 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1029 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1030 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1031 Aoff = Aoffs[0]; 1032 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1033 Boff = Boffs[0]; 1034 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1035 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1036 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1037 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1038 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1039 ierr = PetscFree(notme);CHKERRQ(ierr); 1040 PetscFunctionReturn(0); 1041 } 1042 1043 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1044 { 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 1057 PetscFunctionBegin; 1058 /* do nondiagonal part */ 1059 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1060 /* send it on its way */ 1061 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1062 /* do local part */ 1063 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1064 /* receive remote parts */ 1065 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 /* 1070 This only works correctly for square matrices where the subblock A->A is the 1071 diagonal block 1072 */ 1073 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1074 { 1075 PetscErrorCode ierr; 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1077 1078 PetscFunctionBegin; 1079 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1080 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1081 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1086 { 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1092 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1097 { 1098 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 #if defined(PETSC_USE_LOG) 1103 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1104 #endif 1105 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1106 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1107 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1108 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1109 #if defined(PETSC_USE_CTABLE) 1110 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1111 #else 1112 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1113 #endif 1114 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1115 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1116 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1117 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1118 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1119 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1120 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1121 1122 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1131 #if defined(PETSC_HAVE_ELEMENTAL) 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1133 #endif 1134 #if defined(PETSC_HAVE_HYPRE) 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1137 #endif 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1142 { 1143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1144 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1145 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1146 PetscErrorCode ierr; 1147 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1148 int fd; 1149 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1150 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1151 PetscScalar *column_values; 1152 PetscInt message_count,flowcontrolcount; 1153 FILE *file; 1154 1155 PetscFunctionBegin; 1156 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1157 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1158 nz = A->nz + B->nz; 1159 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1160 if (!rank) { 1161 header[0] = MAT_FILE_CLASSID; 1162 header[1] = mat->rmap->N; 1163 header[2] = mat->cmap->N; 1164 1165 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1166 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1167 /* get largest number of rows any processor has */ 1168 rlen = mat->rmap->n; 1169 range = mat->rmap->range; 1170 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1171 } else { 1172 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1173 rlen = mat->rmap->n; 1174 } 1175 1176 /* load up the local row counts */ 1177 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1178 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1179 1180 /* store the row lengths to the file */ 1181 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1182 if (!rank) { 1183 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1184 for (i=1; i<size; i++) { 1185 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1186 rlen = range[i+1] - range[i]; 1187 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1188 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 } 1190 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1191 } else { 1192 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1193 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1194 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1195 } 1196 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1197 1198 /* load up the local column indices */ 1199 nzmax = nz; /* th processor needs space a largest processor needs */ 1200 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1202 cnt = 0; 1203 for (i=0; i<mat->rmap->n; i++) { 1204 for (j=B->i[i]; j<B->i[i+1]; j++) { 1205 if ((col = garray[B->j[j]]) > cstart) break; 1206 column_indices[cnt++] = col; 1207 } 1208 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1209 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1210 } 1211 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1212 1213 /* store the column indices to the file */ 1214 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1215 if (!rank) { 1216 MPI_Status status; 1217 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1218 for (i=1; i<size; i++) { 1219 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1220 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1221 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1222 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 } 1225 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1226 } else { 1227 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1228 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1231 } 1232 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1233 1234 /* load up the local column values */ 1235 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1236 cnt = 0; 1237 for (i=0; i<mat->rmap->n; i++) { 1238 for (j=B->i[i]; j<B->i[i+1]; j++) { 1239 if (garray[B->j[j]] > cstart) break; 1240 column_values[cnt++] = B->a[j]; 1241 } 1242 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1243 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1244 } 1245 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1246 1247 /* store the column values to the file */ 1248 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1249 if (!rank) { 1250 MPI_Status status; 1251 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1252 for (i=1; i<size; i++) { 1253 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1254 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1255 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1256 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1257 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1258 } 1259 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1260 } else { 1261 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1262 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1265 } 1266 ierr = PetscFree(column_values);CHKERRQ(ierr); 1267 1268 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1269 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #include <petscdraw.h> 1274 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1275 { 1276 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1277 PetscErrorCode ierr; 1278 PetscMPIInt rank = aij->rank,size = aij->size; 1279 PetscBool isdraw,iascii,isbinary; 1280 PetscViewer sviewer; 1281 PetscViewerFormat format; 1282 1283 PetscFunctionBegin; 1284 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1285 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1287 if (iascii) { 1288 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1289 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1290 MatInfo info; 1291 PetscBool inodes; 1292 1293 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1294 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1295 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1296 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1297 if (!inodes) { 1298 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1299 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1300 } else { 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1302 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1303 } 1304 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1306 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1311 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1312 PetscFunctionReturn(0); 1313 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1314 PetscInt inodecount,inodelimit,*inodes; 1315 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1316 if (inodes) { 1317 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1320 } 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1323 PetscFunctionReturn(0); 1324 } 1325 } else if (isbinary) { 1326 if (size == 1) { 1327 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1328 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1329 } else { 1330 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1331 } 1332 PetscFunctionReturn(0); 1333 } else if (isdraw) { 1334 PetscDraw draw; 1335 PetscBool isnull; 1336 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1337 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1338 if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 { 1342 /* assemble the entire matrix onto first processor. */ 1343 Mat A; 1344 Mat_SeqAIJ *Aloc; 1345 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1346 MatScalar *a; 1347 1348 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1349 if (!rank) { 1350 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1351 } else { 1352 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1353 } 1354 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1355 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1356 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1357 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1358 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1359 1360 /* copy over the A part */ 1361 Aloc = (Mat_SeqAIJ*)aij->A->data; 1362 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1363 row = mat->rmap->rstart; 1364 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1365 for (i=0; i<m; i++) { 1366 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1367 row++; 1368 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1369 } 1370 aj = Aloc->j; 1371 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1372 1373 /* copy over the B part */ 1374 Aloc = (Mat_SeqAIJ*)aij->B->data; 1375 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1376 row = mat->rmap->rstart; 1377 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1378 ct = cols; 1379 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1380 for (i=0; i<m; i++) { 1381 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1382 row++; 1383 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1384 } 1385 ierr = PetscFree(ct);CHKERRQ(ierr); 1386 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 /* 1389 Everyone has to call to draw the matrix since the graphics waits are 1390 synchronized across all processors that share the PetscDraw object 1391 */ 1392 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1393 if (!rank) { 1394 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1395 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1396 } 1397 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1399 ierr = MatDestroy(&A);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1405 { 1406 PetscErrorCode ierr; 1407 PetscBool iascii,isdraw,issocket,isbinary; 1408 1409 PetscFunctionBegin; 1410 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1411 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1414 if (iascii || isdraw || isbinary || issocket) { 1415 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1421 { 1422 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1423 PetscErrorCode ierr; 1424 Vec bb1 = 0; 1425 PetscBool hasop; 1426 1427 PetscFunctionBegin; 1428 if (flag == SOR_APPLY_UPPER) { 1429 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } 1432 1433 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1434 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1435 } 1436 1437 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1438 if (flag & SOR_ZERO_INITIAL_GUESS) { 1439 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1440 its--; 1441 } 1442 1443 while (its--) { 1444 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1445 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1446 1447 /* update rhs: bb1 = bb - B*x */ 1448 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1449 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1450 1451 /* local sweep */ 1452 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1453 } 1454 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1455 if (flag & SOR_ZERO_INITIAL_GUESS) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 its--; 1458 } 1459 while (its--) { 1460 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1462 1463 /* update rhs: bb1 = bb - B*x */ 1464 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1465 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1466 1467 /* local sweep */ 1468 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1469 } 1470 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1471 if (flag & SOR_ZERO_INITIAL_GUESS) { 1472 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1473 its--; 1474 } 1475 while (its--) { 1476 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1478 1479 /* update rhs: bb1 = bb - B*x */ 1480 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1481 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1482 1483 /* local sweep */ 1484 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1485 } 1486 } else if (flag & SOR_EISENSTAT) { 1487 Vec xx1; 1488 1489 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1491 1492 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1494 if (!mat->diag) { 1495 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1496 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1497 } 1498 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1499 if (hasop) { 1500 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1501 } else { 1502 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1503 } 1504 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1505 1506 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1510 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1511 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1512 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1513 1514 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1515 1516 matin->factorerrortype = mat->A->factorerrortype; 1517 PetscFunctionReturn(0); 1518 } 1519 1520 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1521 { 1522 Mat aA,aB,Aperm; 1523 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1524 PetscScalar *aa,*ba; 1525 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1526 PetscSF rowsf,sf; 1527 IS parcolp = NULL; 1528 PetscBool done; 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1533 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1534 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1535 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1536 1537 /* Invert row permutation to find out where my rows should go */ 1538 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1539 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1540 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1541 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1542 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1543 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1544 1545 /* Invert column permutation to find out where my columns should go */ 1546 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1547 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1548 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1549 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1550 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1551 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1552 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1553 1554 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1555 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1556 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1557 1558 /* Find out where my gcols should go */ 1559 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1560 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1564 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1565 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1566 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1567 1568 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1569 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1570 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1571 for (i=0; i<m; i++) { 1572 PetscInt row = rdest[i],rowner; 1573 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1574 for (j=ai[i]; j<ai[i+1]; j++) { 1575 PetscInt cowner,col = cdest[aj[j]]; 1576 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1577 if (rowner == cowner) dnnz[i]++; 1578 else onnz[i]++; 1579 } 1580 for (j=bi[i]; j<bi[i+1]; j++) { 1581 PetscInt cowner,col = gcdest[bj[j]]; 1582 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 } 1587 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1588 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1589 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1591 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1592 1593 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1594 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1595 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1596 for (i=0; i<m; i++) { 1597 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1598 PetscInt j0,rowlen; 1599 rowlen = ai[i+1] - ai[i]; 1600 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1601 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1602 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1603 } 1604 rowlen = bi[i+1] - bi[i]; 1605 for (j0=j=0; j<rowlen; j0=j) { 1606 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1607 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1608 } 1609 } 1610 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1611 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1612 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1613 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1614 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1615 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1616 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1617 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1618 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1619 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1620 *B = Aperm; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1625 { 1626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1627 PetscErrorCode ierr; 1628 1629 PetscFunctionBegin; 1630 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1631 if (ghosts) *ghosts = aij->garray; 1632 PetscFunctionReturn(0); 1633 } 1634 1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1636 { 1637 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1638 Mat A = mat->A,B = mat->B; 1639 PetscErrorCode ierr; 1640 PetscReal isend[5],irecv[5]; 1641 1642 PetscFunctionBegin; 1643 info->block_size = 1.0; 1644 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1645 1646 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1647 isend[3] = info->memory; isend[4] = info->mallocs; 1648 1649 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1650 1651 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1652 isend[3] += info->memory; isend[4] += info->mallocs; 1653 if (flag == MAT_LOCAL) { 1654 info->nz_used = isend[0]; 1655 info->nz_allocated = isend[1]; 1656 info->nz_unneeded = isend[2]; 1657 info->memory = isend[3]; 1658 info->mallocs = isend[4]; 1659 } else if (flag == MAT_GLOBAL_MAX) { 1660 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1661 1662 info->nz_used = irecv[0]; 1663 info->nz_allocated = irecv[1]; 1664 info->nz_unneeded = irecv[2]; 1665 info->memory = irecv[3]; 1666 info->mallocs = irecv[4]; 1667 } else if (flag == MAT_GLOBAL_SUM) { 1668 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1669 1670 info->nz_used = irecv[0]; 1671 info->nz_allocated = irecv[1]; 1672 info->nz_unneeded = irecv[2]; 1673 info->memory = irecv[3]; 1674 info->mallocs = irecv[4]; 1675 } 1676 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1677 info->fill_ratio_needed = 0; 1678 info->factor_mallocs = 0; 1679 PetscFunctionReturn(0); 1680 } 1681 1682 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1683 { 1684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1685 PetscErrorCode ierr; 1686 1687 PetscFunctionBegin; 1688 switch (op) { 1689 case MAT_NEW_NONZERO_LOCATIONS: 1690 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1691 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1692 case MAT_KEEP_NONZERO_PATTERN: 1693 case MAT_NEW_NONZERO_LOCATION_ERR: 1694 case MAT_USE_INODES: 1695 case MAT_IGNORE_ZERO_ENTRIES: 1696 MatCheckPreallocated(A,1); 1697 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1698 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1699 break; 1700 case MAT_ROW_ORIENTED: 1701 MatCheckPreallocated(A,1); 1702 a->roworiented = flg; 1703 1704 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1705 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1706 break; 1707 case MAT_NEW_DIAGONALS: 1708 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1709 break; 1710 case MAT_IGNORE_OFF_PROC_ENTRIES: 1711 a->donotstash = flg; 1712 break; 1713 case MAT_SPD: 1714 A->spd_set = PETSC_TRUE; 1715 A->spd = flg; 1716 if (flg) { 1717 A->symmetric = PETSC_TRUE; 1718 A->structurally_symmetric = PETSC_TRUE; 1719 A->symmetric_set = PETSC_TRUE; 1720 A->structurally_symmetric_set = PETSC_TRUE; 1721 } 1722 break; 1723 case MAT_SYMMETRIC: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_STRUCTURALLY_SYMMETRIC: 1728 MatCheckPreallocated(A,1); 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_HERMITIAN: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_SYMMETRY_ETERNAL: 1736 MatCheckPreallocated(A,1); 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_SUBMAT_SINGLEIS: 1740 A->submat_singleis = flg; 1741 break; 1742 case MAT_STRUCTURE_ONLY: 1743 /* The option is handled directly by MatSetOption() */ 1744 break; 1745 default: 1746 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1747 } 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1752 { 1753 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1754 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1755 PetscErrorCode ierr; 1756 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1757 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1758 PetscInt *cmap,*idx_p; 1759 1760 PetscFunctionBegin; 1761 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1762 mat->getrowactive = PETSC_TRUE; 1763 1764 if (!mat->rowvalues && (idx || v)) { 1765 /* 1766 allocate enough space to hold information from the longest row. 1767 */ 1768 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1769 PetscInt max = 1,tmp; 1770 for (i=0; i<matin->rmap->n; i++) { 1771 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1772 if (max < tmp) max = tmp; 1773 } 1774 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1775 } 1776 1777 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1778 lrow = row - rstart; 1779 1780 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1781 if (!v) {pvA = 0; pvB = 0;} 1782 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1783 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1784 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1785 nztot = nzA + nzB; 1786 1787 cmap = mat->garray; 1788 if (v || idx) { 1789 if (nztot) { 1790 /* Sort by increasing column numbers, assuming A and B already sorted */ 1791 PetscInt imark = -1; 1792 if (v) { 1793 *v = v_p = mat->rowvalues; 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1796 else break; 1797 } 1798 imark = i; 1799 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1800 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1801 } 1802 if (idx) { 1803 *idx = idx_p = mat->rowindices; 1804 if (imark > -1) { 1805 for (i=0; i<imark; i++) { 1806 idx_p[i] = cmap[cworkB[i]]; 1807 } 1808 } else { 1809 for (i=0; i<nzB; i++) { 1810 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1811 else break; 1812 } 1813 imark = i; 1814 } 1815 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1816 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1817 } 1818 } else { 1819 if (idx) *idx = 0; 1820 if (v) *v = 0; 1821 } 1822 } 1823 *nz = nztot; 1824 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1825 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1830 { 1831 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1832 1833 PetscFunctionBegin; 1834 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1835 aij->getrowactive = PETSC_FALSE; 1836 PetscFunctionReturn(0); 1837 } 1838 1839 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1840 { 1841 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1842 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1843 PetscErrorCode ierr; 1844 PetscInt i,j,cstart = mat->cmap->rstart; 1845 PetscReal sum = 0.0; 1846 MatScalar *v; 1847 1848 PetscFunctionBegin; 1849 if (aij->size == 1) { 1850 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1851 } else { 1852 if (type == NORM_FROBENIUS) { 1853 v = amat->a; 1854 for (i=0; i<amat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 v = bmat->a; 1858 for (i=0; i<bmat->nz; i++) { 1859 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1860 } 1861 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1862 *norm = PetscSqrtReal(*norm); 1863 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1864 } else if (type == NORM_1) { /* max column norm */ 1865 PetscReal *tmp,*tmp2; 1866 PetscInt *jj,*garray = aij->garray; 1867 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1868 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1869 *norm = 0.0; 1870 v = amat->a; jj = amat->j; 1871 for (j=0; j<amat->nz; j++) { 1872 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1873 } 1874 v = bmat->a; jj = bmat->j; 1875 for (j=0; j<bmat->nz; j++) { 1876 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1877 } 1878 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1879 for (j=0; j<mat->cmap->N; j++) { 1880 if (tmp2[j] > *norm) *norm = tmp2[j]; 1881 } 1882 ierr = PetscFree(tmp);CHKERRQ(ierr); 1883 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1884 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1885 } else if (type == NORM_INFINITY) { /* max row norm */ 1886 PetscReal ntemp = 0.0; 1887 for (j=0; j<aij->A->rmap->n; j++) { 1888 v = amat->a + amat->i[j]; 1889 sum = 0.0; 1890 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); v++; 1892 } 1893 v = bmat->a + bmat->i[j]; 1894 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1895 sum += PetscAbsScalar(*v); v++; 1896 } 1897 if (sum > ntemp) ntemp = sum; 1898 } 1899 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1900 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1901 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1902 } 1903 PetscFunctionReturn(0); 1904 } 1905 1906 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1907 { 1908 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1909 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1910 PetscErrorCode ierr; 1911 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1912 PetscInt cstart = A->cmap->rstart,ncol; 1913 Mat B; 1914 MatScalar *array; 1915 1916 PetscFunctionBegin; 1917 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1918 1919 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1920 ai = Aloc->i; aj = Aloc->j; 1921 bi = Bloc->i; bj = Bloc->j; 1922 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1923 PetscInt *d_nnz,*g_nnz,*o_nnz; 1924 PetscSFNode *oloc; 1925 PETSC_UNUSED PetscSF sf; 1926 1927 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1928 /* compute d_nnz for preallocation */ 1929 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1930 for (i=0; i<ai[ma]; i++) { 1931 d_nnz[aj[i]]++; 1932 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1933 } 1934 /* compute local off-diagonal contributions */ 1935 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1936 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1937 /* map those to global */ 1938 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1939 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1940 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1941 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1942 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1943 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1944 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1945 1946 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1947 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1948 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1949 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1950 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1951 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1952 } else { 1953 B = *matout; 1954 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1955 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1956 } 1957 1958 /* copy over the A part */ 1959 array = Aloc->a; 1960 row = A->rmap->rstart; 1961 for (i=0; i<ma; i++) { 1962 ncol = ai[i+1]-ai[i]; 1963 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1964 row++; 1965 array += ncol; aj += ncol; 1966 } 1967 aj = Aloc->j; 1968 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1969 1970 /* copy over the B part */ 1971 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1972 array = Bloc->a; 1973 row = A->rmap->rstart; 1974 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1975 cols_tmp = cols; 1976 for (i=0; i<mb; i++) { 1977 ncol = bi[i+1]-bi[i]; 1978 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1979 row++; 1980 array += ncol; cols_tmp += ncol; 1981 } 1982 ierr = PetscFree(cols);CHKERRQ(ierr); 1983 1984 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1986 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1987 *matout = B; 1988 } else { 1989 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1990 } 1991 PetscFunctionReturn(0); 1992 } 1993 1994 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1995 { 1996 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1997 Mat a = aij->A,b = aij->B; 1998 PetscErrorCode ierr; 1999 PetscInt s1,s2,s3; 2000 2001 PetscFunctionBegin; 2002 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2003 if (rr) { 2004 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2005 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2006 /* Overlap communication with computation. */ 2007 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2008 } 2009 if (ll) { 2010 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2011 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2012 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2013 } 2014 /* scale the diagonal block */ 2015 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2016 2017 if (rr) { 2018 /* Do a scatter end and then right scale the off-diagonal block */ 2019 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2036 { 2037 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2038 Mat a,b,c,d; 2039 PetscBool flg; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 a = matA->A; b = matA->B; 2044 c = matB->A; d = matB->B; 2045 2046 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2047 if (flg) { 2048 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2049 } 2050 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2055 { 2056 PetscErrorCode ierr; 2057 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2058 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2059 2060 PetscFunctionBegin; 2061 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2062 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2063 /* because of the column compression in the off-processor part of the matrix a->B, 2064 the number of columns in a->B and b->B may be different, hence we cannot call 2065 the MatCopy() directly on the two parts. If need be, we can provide a more 2066 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2067 then copying the submatrices */ 2068 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2069 } else { 2070 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2071 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2072 } 2073 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2078 { 2079 PetscErrorCode ierr; 2080 2081 PetscFunctionBegin; 2082 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 /* 2087 Computes the number of nonzeros per row needed for preallocation when X and Y 2088 have different nonzero structure. 2089 */ 2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2091 { 2092 PetscInt i,j,k,nzx,nzy; 2093 2094 PetscFunctionBegin; 2095 /* Set the number of nonzeros in the new matrix */ 2096 for (i=0; i<m; i++) { 2097 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2098 nzx = xi[i+1] - xi[i]; 2099 nzy = yi[i+1] - yi[i]; 2100 nnz[i] = 0; 2101 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2102 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2103 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2104 nnz[i]++; 2105 } 2106 for (; k<nzy; k++) nnz[i]++; 2107 } 2108 PetscFunctionReturn(0); 2109 } 2110 2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2113 { 2114 PetscErrorCode ierr; 2115 PetscInt m = Y->rmap->N; 2116 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2117 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2118 2119 PetscFunctionBegin; 2120 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2125 { 2126 PetscErrorCode ierr; 2127 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2128 PetscBLASInt bnz,one=1; 2129 Mat_SeqAIJ *x,*y; 2130 2131 PetscFunctionBegin; 2132 if (str == SAME_NONZERO_PATTERN) { 2133 PetscScalar alpha = a; 2134 x = (Mat_SeqAIJ*)xx->A->data; 2135 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2136 y = (Mat_SeqAIJ*)yy->A->data; 2137 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2138 x = (Mat_SeqAIJ*)xx->B->data; 2139 y = (Mat_SeqAIJ*)yy->B->data; 2140 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2142 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2143 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2144 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2145 } else { 2146 Mat B; 2147 PetscInt *nnz_d,*nnz_o; 2148 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2149 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2150 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2151 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2152 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2153 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2154 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2155 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2156 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2157 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2158 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2159 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2160 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2161 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2167 2168 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2169 { 2170 #if defined(PETSC_USE_COMPLEX) 2171 PetscErrorCode ierr; 2172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2173 2174 PetscFunctionBegin; 2175 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2176 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2177 #else 2178 PetscFunctionBegin; 2179 #endif 2180 PetscFunctionReturn(0); 2181 } 2182 2183 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2184 { 2185 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2186 PetscErrorCode ierr; 2187 2188 PetscFunctionBegin; 2189 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2190 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2201 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2206 { 2207 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2208 PetscErrorCode ierr; 2209 PetscInt i,*idxb = 0; 2210 PetscScalar *va,*vb; 2211 Vec vtmp; 2212 2213 PetscFunctionBegin; 2214 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2215 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2216 if (idx) { 2217 for (i=0; i<A->rmap->n; i++) { 2218 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2219 } 2220 } 2221 2222 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2223 if (idx) { 2224 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2225 } 2226 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2227 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2228 2229 for (i=0; i<A->rmap->n; i++) { 2230 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2231 va[i] = vb[i]; 2232 if (idx) idx[i] = a->garray[idxb[i]]; 2233 } 2234 } 2235 2236 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2237 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2238 ierr = PetscFree(idxb);CHKERRQ(ierr); 2239 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2240 PetscFunctionReturn(0); 2241 } 2242 2243 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2244 { 2245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2246 PetscErrorCode ierr; 2247 PetscInt i,*idxb = 0; 2248 PetscScalar *va,*vb; 2249 Vec vtmp; 2250 2251 PetscFunctionBegin; 2252 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2253 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2254 if (idx) { 2255 for (i=0; i<A->cmap->n; i++) { 2256 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2257 } 2258 } 2259 2260 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2261 if (idx) { 2262 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2263 } 2264 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2265 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2266 2267 for (i=0; i<A->rmap->n; i++) { 2268 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2269 va[i] = vb[i]; 2270 if (idx) idx[i] = a->garray[idxb[i]]; 2271 } 2272 } 2273 2274 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2275 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2276 ierr = PetscFree(idxb);CHKERRQ(ierr); 2277 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2282 { 2283 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2284 PetscInt n = A->rmap->n; 2285 PetscInt cstart = A->cmap->rstart; 2286 PetscInt *cmap = mat->garray; 2287 PetscInt *diagIdx, *offdiagIdx; 2288 Vec diagV, offdiagV; 2289 PetscScalar *a, *diagA, *offdiagA; 2290 PetscInt r; 2291 PetscErrorCode ierr; 2292 2293 PetscFunctionBegin; 2294 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2295 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2296 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2297 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2298 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2299 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2300 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2301 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2302 for (r = 0; r < n; ++r) { 2303 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 idx[r] = cstart + diagIdx[r]; 2306 } else { 2307 a[r] = offdiagA[r]; 2308 idx[r] = cmap[offdiagIdx[r]]; 2309 } 2310 } 2311 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2312 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2313 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2314 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2315 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2316 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2321 { 2322 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2323 PetscInt n = A->rmap->n; 2324 PetscInt cstart = A->cmap->rstart; 2325 PetscInt *cmap = mat->garray; 2326 PetscInt *diagIdx, *offdiagIdx; 2327 Vec diagV, offdiagV; 2328 PetscScalar *a, *diagA, *offdiagA; 2329 PetscInt r; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2334 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2335 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2336 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2337 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2338 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2339 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2340 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2341 for (r = 0; r < n; ++r) { 2342 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2343 a[r] = diagA[r]; 2344 idx[r] = cstart + diagIdx[r]; 2345 } else { 2346 a[r] = offdiagA[r]; 2347 idx[r] = cmap[offdiagIdx[r]]; 2348 } 2349 } 2350 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2351 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2352 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2353 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2354 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2355 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2360 { 2361 PetscErrorCode ierr; 2362 Mat *dummy; 2363 2364 PetscFunctionBegin; 2365 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2366 *newmat = *dummy; 2367 ierr = PetscFree(dummy);CHKERRQ(ierr); 2368 PetscFunctionReturn(0); 2369 } 2370 2371 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2372 { 2373 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2374 PetscErrorCode ierr; 2375 2376 PetscFunctionBegin; 2377 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2378 A->factorerrortype = a->A->factorerrortype; 2379 PetscFunctionReturn(0); 2380 } 2381 2382 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2383 { 2384 PetscErrorCode ierr; 2385 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2386 2387 PetscFunctionBegin; 2388 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2389 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2390 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2396 { 2397 PetscFunctionBegin; 2398 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2399 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 /*@ 2404 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2405 2406 Collective on Mat 2407 2408 Input Parameters: 2409 + A - the matrix 2410 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2411 2412 Level: advanced 2413 2414 @*/ 2415 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2416 { 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2425 { 2426 PetscErrorCode ierr; 2427 PetscBool sc = PETSC_FALSE,flg; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2431 ierr = PetscObjectOptionsBegin((PetscObject)A); 2432 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2433 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2434 if (flg) { 2435 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2436 } 2437 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2438 PetscFunctionReturn(0); 2439 } 2440 2441 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2442 { 2443 PetscErrorCode ierr; 2444 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2445 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2446 2447 PetscFunctionBegin; 2448 if (!Y->preallocated) { 2449 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2450 } else if (!aij->nz) { 2451 PetscInt nonew = aij->nonew; 2452 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2453 aij->nonew = nonew; 2454 } 2455 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2460 { 2461 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2466 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2467 if (d) { 2468 PetscInt rstart; 2469 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2470 *d += rstart; 2471 2472 } 2473 PetscFunctionReturn(0); 2474 } 2475 2476 2477 /* -------------------------------------------------------------------*/ 2478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2479 MatGetRow_MPIAIJ, 2480 MatRestoreRow_MPIAIJ, 2481 MatMult_MPIAIJ, 2482 /* 4*/ MatMultAdd_MPIAIJ, 2483 MatMultTranspose_MPIAIJ, 2484 MatMultTransposeAdd_MPIAIJ, 2485 0, 2486 0, 2487 0, 2488 /*10*/ 0, 2489 0, 2490 0, 2491 MatSOR_MPIAIJ, 2492 MatTranspose_MPIAIJ, 2493 /*15*/ MatGetInfo_MPIAIJ, 2494 MatEqual_MPIAIJ, 2495 MatGetDiagonal_MPIAIJ, 2496 MatDiagonalScale_MPIAIJ, 2497 MatNorm_MPIAIJ, 2498 /*20*/ MatAssemblyBegin_MPIAIJ, 2499 MatAssemblyEnd_MPIAIJ, 2500 MatSetOption_MPIAIJ, 2501 MatZeroEntries_MPIAIJ, 2502 /*24*/ MatZeroRows_MPIAIJ, 2503 0, 2504 0, 2505 0, 2506 0, 2507 /*29*/ MatSetUp_MPIAIJ, 2508 0, 2509 0, 2510 MatGetDiagonalBlock_MPIAIJ, 2511 0, 2512 /*34*/ MatDuplicate_MPIAIJ, 2513 0, 2514 0, 2515 0, 2516 0, 2517 /*39*/ MatAXPY_MPIAIJ, 2518 MatCreateSubMatrices_MPIAIJ, 2519 MatIncreaseOverlap_MPIAIJ, 2520 MatGetValues_MPIAIJ, 2521 MatCopy_MPIAIJ, 2522 /*44*/ MatGetRowMax_MPIAIJ, 2523 MatScale_MPIAIJ, 2524 MatShift_MPIAIJ, 2525 MatDiagonalSet_MPIAIJ, 2526 MatZeroRowsColumns_MPIAIJ, 2527 /*49*/ MatSetRandom_MPIAIJ, 2528 0, 2529 0, 2530 0, 2531 0, 2532 /*54*/ MatFDColoringCreate_MPIXAIJ, 2533 0, 2534 MatSetUnfactored_MPIAIJ, 2535 MatPermute_MPIAIJ, 2536 0, 2537 /*59*/ MatCreateSubMatrix_MPIAIJ, 2538 MatDestroy_MPIAIJ, 2539 MatView_MPIAIJ, 2540 0, 2541 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2542 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2543 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2544 0, 2545 0, 2546 0, 2547 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2548 MatGetRowMinAbs_MPIAIJ, 2549 0, 2550 0, 2551 0, 2552 0, 2553 /*75*/ MatFDColoringApply_AIJ, 2554 MatSetFromOptions_MPIAIJ, 2555 0, 2556 0, 2557 MatFindZeroDiagonals_MPIAIJ, 2558 /*80*/ 0, 2559 0, 2560 0, 2561 /*83*/ MatLoad_MPIAIJ, 2562 MatIsSymmetric_MPIAIJ, 2563 0, 2564 0, 2565 0, 2566 0, 2567 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2568 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2569 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2570 MatPtAP_MPIAIJ_MPIAIJ, 2571 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2572 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2573 0, 2574 0, 2575 0, 2576 0, 2577 /*99*/ 0, 2578 0, 2579 0, 2580 MatConjugate_MPIAIJ, 2581 0, 2582 /*104*/MatSetValuesRow_MPIAIJ, 2583 MatRealPart_MPIAIJ, 2584 MatImaginaryPart_MPIAIJ, 2585 0, 2586 0, 2587 /*109*/0, 2588 0, 2589 MatGetRowMin_MPIAIJ, 2590 0, 2591 MatMissingDiagonal_MPIAIJ, 2592 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2593 0, 2594 MatGetGhosts_MPIAIJ, 2595 0, 2596 0, 2597 /*119*/0, 2598 0, 2599 0, 2600 0, 2601 MatGetMultiProcBlock_MPIAIJ, 2602 /*124*/MatFindNonzeroRows_MPIAIJ, 2603 MatGetColumnNorms_MPIAIJ, 2604 MatInvertBlockDiagonal_MPIAIJ, 2605 0, 2606 MatCreateSubMatricesMPI_MPIAIJ, 2607 /*129*/0, 2608 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2609 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2610 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2611 0, 2612 /*134*/0, 2613 0, 2614 MatRARt_MPIAIJ_MPIAIJ, 2615 0, 2616 0, 2617 /*139*/MatSetBlockSizes_MPIAIJ, 2618 0, 2619 0, 2620 MatFDColoringSetUp_MPIXAIJ, 2621 MatFindOffBlockDiagonalEntries_MPIAIJ, 2622 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2623 }; 2624 2625 /* ----------------------------------------------------------------------------------------*/ 2626 2627 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2628 { 2629 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2630 PetscErrorCode ierr; 2631 2632 PetscFunctionBegin; 2633 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2634 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2635 PetscFunctionReturn(0); 2636 } 2637 2638 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2639 { 2640 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2645 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2650 { 2651 Mat_MPIAIJ *b; 2652 PetscErrorCode ierr; 2653 2654 PetscFunctionBegin; 2655 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2656 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2657 b = (Mat_MPIAIJ*)B->data; 2658 2659 #if defined(PETSC_USE_CTABLE) 2660 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2661 #else 2662 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2663 #endif 2664 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2665 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2666 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2667 2668 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2669 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2670 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2671 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2672 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2673 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2674 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2675 2676 if (!B->preallocated) { 2677 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2678 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2679 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2680 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2681 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2682 } 2683 2684 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2685 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2686 B->preallocated = PETSC_TRUE; 2687 B->was_assembled = PETSC_FALSE; 2688 B->assembled = PETSC_FALSE;; 2689 PetscFunctionReturn(0); 2690 } 2691 2692 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2693 { 2694 Mat_MPIAIJ *b; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2699 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2700 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2701 b = (Mat_MPIAIJ*)B->data; 2702 2703 #if defined(PETSC_USE_CTABLE) 2704 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2705 #else 2706 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2707 #endif 2708 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2709 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2710 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2711 2712 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2713 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2714 B->preallocated = PETSC_TRUE; 2715 B->was_assembled = PETSC_FALSE; 2716 B->assembled = PETSC_FALSE; 2717 PetscFunctionReturn(0); 2718 } 2719 2720 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2721 { 2722 Mat mat; 2723 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2724 PetscErrorCode ierr; 2725 2726 PetscFunctionBegin; 2727 *newmat = 0; 2728 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2729 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2730 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2731 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2732 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2733 a = (Mat_MPIAIJ*)mat->data; 2734 2735 mat->factortype = matin->factortype; 2736 mat->assembled = PETSC_TRUE; 2737 mat->insertmode = NOT_SET_VALUES; 2738 mat->preallocated = PETSC_TRUE; 2739 2740 a->size = oldmat->size; 2741 a->rank = oldmat->rank; 2742 a->donotstash = oldmat->donotstash; 2743 a->roworiented = oldmat->roworiented; 2744 a->rowindices = 0; 2745 a->rowvalues = 0; 2746 a->getrowactive = PETSC_FALSE; 2747 2748 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2749 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2750 2751 if (oldmat->colmap) { 2752 #if defined(PETSC_USE_CTABLE) 2753 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2754 #else 2755 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2756 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2757 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2758 #endif 2759 } else a->colmap = 0; 2760 if (oldmat->garray) { 2761 PetscInt len; 2762 len = oldmat->B->cmap->n; 2763 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2764 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2765 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2766 } else a->garray = 0; 2767 2768 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2770 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2771 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2772 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2774 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2776 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2777 *newmat = mat; 2778 PetscFunctionReturn(0); 2779 } 2780 2781 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2782 { 2783 PetscScalar *vals,*svals; 2784 MPI_Comm comm; 2785 PetscErrorCode ierr; 2786 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2787 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2788 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2789 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2790 PetscInt cend,cstart,n,*rowners; 2791 int fd; 2792 PetscInt bs = newMat->rmap->bs; 2793 2794 PetscFunctionBegin; 2795 /* force binary viewer to load .info file if it has not yet done so */ 2796 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2797 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2798 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2799 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2800 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2801 if (!rank) { 2802 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2803 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2804 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2805 } 2806 2807 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2808 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2809 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2810 if (bs < 0) bs = 1; 2811 2812 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2813 M = header[1]; N = header[2]; 2814 2815 /* If global sizes are set, check if they are consistent with that given in the file */ 2816 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2817 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2818 2819 /* determine ownership of all (block) rows */ 2820 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2821 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2822 else m = newMat->rmap->n; /* Set by user */ 2823 2824 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2825 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2826 2827 /* First process needs enough room for process with most rows */ 2828 if (!rank) { 2829 mmax = rowners[1]; 2830 for (i=2; i<=size; i++) { 2831 mmax = PetscMax(mmax, rowners[i]); 2832 } 2833 } else mmax = -1; /* unused, but compilers complain */ 2834 2835 rowners[0] = 0; 2836 for (i=2; i<=size; i++) { 2837 rowners[i] += rowners[i-1]; 2838 } 2839 rstart = rowners[rank]; 2840 rend = rowners[rank+1]; 2841 2842 /* distribute row lengths to all processors */ 2843 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2844 if (!rank) { 2845 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2846 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2847 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2848 for (j=0; j<m; j++) { 2849 procsnz[0] += ourlens[j]; 2850 } 2851 for (i=1; i<size; i++) { 2852 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2853 /* calculate the number of nonzeros on each processor */ 2854 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2855 procsnz[i] += rowlengths[j]; 2856 } 2857 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2858 } 2859 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2860 } else { 2861 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2862 } 2863 2864 if (!rank) { 2865 /* determine max buffer needed and allocate it */ 2866 maxnz = 0; 2867 for (i=0; i<size; i++) { 2868 maxnz = PetscMax(maxnz,procsnz[i]); 2869 } 2870 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2871 2872 /* read in my part of the matrix column indices */ 2873 nz = procsnz[0]; 2874 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2875 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2876 2877 /* read in every one elses and ship off */ 2878 for (i=1; i<size; i++) { 2879 nz = procsnz[i]; 2880 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2881 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2882 } 2883 ierr = PetscFree(cols);CHKERRQ(ierr); 2884 } else { 2885 /* determine buffer space needed for message */ 2886 nz = 0; 2887 for (i=0; i<m; i++) { 2888 nz += ourlens[i]; 2889 } 2890 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2891 2892 /* receive message of column indices*/ 2893 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2894 } 2895 2896 /* determine column ownership if matrix is not square */ 2897 if (N != M) { 2898 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2899 else n = newMat->cmap->n; 2900 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2901 cstart = cend - n; 2902 } else { 2903 cstart = rstart; 2904 cend = rend; 2905 n = cend - cstart; 2906 } 2907 2908 /* loop over local rows, determining number of off diagonal entries */ 2909 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2910 jj = 0; 2911 for (i=0; i<m; i++) { 2912 for (j=0; j<ourlens[i]; j++) { 2913 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2914 jj++; 2915 } 2916 } 2917 2918 for (i=0; i<m; i++) { 2919 ourlens[i] -= offlens[i]; 2920 } 2921 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2922 2923 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2924 2925 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2926 2927 for (i=0; i<m; i++) { 2928 ourlens[i] += offlens[i]; 2929 } 2930 2931 if (!rank) { 2932 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2933 2934 /* read in my part of the matrix numerical values */ 2935 nz = procsnz[0]; 2936 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2937 2938 /* insert into matrix */ 2939 jj = rstart; 2940 smycols = mycols; 2941 svals = vals; 2942 for (i=0; i<m; i++) { 2943 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2944 smycols += ourlens[i]; 2945 svals += ourlens[i]; 2946 jj++; 2947 } 2948 2949 /* read in other processors and ship out */ 2950 for (i=1; i<size; i++) { 2951 nz = procsnz[i]; 2952 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2953 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2954 } 2955 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2956 } else { 2957 /* receive numeric values */ 2958 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2959 2960 /* receive message of values*/ 2961 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2962 2963 /* insert into matrix */ 2964 jj = rstart; 2965 smycols = mycols; 2966 svals = vals; 2967 for (i=0; i<m; i++) { 2968 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2969 smycols += ourlens[i]; 2970 svals += ourlens[i]; 2971 jj++; 2972 } 2973 } 2974 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2975 ierr = PetscFree(vals);CHKERRQ(ierr); 2976 ierr = PetscFree(mycols);CHKERRQ(ierr); 2977 ierr = PetscFree(rowners);CHKERRQ(ierr); 2978 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2979 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /* Not scalable because of ISAllGather() unless getting all columns. */ 2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2985 { 2986 PetscErrorCode ierr; 2987 IS iscol_local; 2988 PetscBool isstride; 2989 PetscMPIInt lisstride=0,gisstride; 2990 2991 PetscFunctionBegin; 2992 /* check if we are grabbing all columns*/ 2993 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2994 2995 if (isstride) { 2996 PetscInt start,len,mstart,mlen; 2997 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2998 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2999 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3000 if (mstart == start && mlen-mstart == len) lisstride = 1; 3001 } 3002 3003 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3004 if (gisstride) { 3005 PetscInt N; 3006 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3007 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3008 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3009 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3010 } else { 3011 PetscInt cbs; 3012 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3013 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3014 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3015 } 3016 3017 *isseq = iscol_local; 3018 PetscFunctionReturn(0); 3019 } 3020 3021 /* 3022 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3023 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3024 3025 Input Parameters: 3026 mat - matrix 3027 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3028 i.e., mat->rstart <= isrow[i] < mat->rend 3029 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3030 i.e., mat->cstart <= iscol[i] < mat->cend 3031 Output Parameter: 3032 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3033 iscol_o - sequential column index set for retrieving mat->B 3034 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3035 */ 3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3037 { 3038 PetscErrorCode ierr; 3039 Vec x,cmap; 3040 const PetscInt *is_idx; 3041 PetscScalar *xarray,*cmaparray; 3042 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3044 Mat B=a->B; 3045 Vec lvec=a->lvec,lcmap; 3046 PetscInt i,cstart,cend,Bn=B->cmap->N; 3047 MPI_Comm comm; 3048 VecScatter Mvctx=a->Mvctx; 3049 3050 PetscFunctionBegin; 3051 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3052 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3053 3054 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3055 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3056 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3057 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3058 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3059 3060 /* Get start indices */ 3061 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3062 isstart -= ncols; 3063 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3064 3065 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3066 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3067 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3068 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3069 for (i=0; i<ncols; i++) { 3070 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3071 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3072 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3073 } 3074 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3075 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3076 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3077 3078 /* Get iscol_d */ 3079 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3080 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3081 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3082 3083 /* Get isrow_d */ 3084 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3085 rstart = mat->rmap->rstart; 3086 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3087 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3088 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3089 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3090 3091 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3092 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3094 3095 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3096 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 3099 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3100 3101 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 3104 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3105 /* off-process column indices */ 3106 count = 0; 3107 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3108 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3109 3110 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3111 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3112 for (i=0; i<Bn; i++) { 3113 if (PetscRealPart(xarray[i]) > -1.0) { 3114 idx[count] = i; /* local column index in off-diagonal part B */ 3115 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3116 count++; 3117 } 3118 } 3119 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3121 3122 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3123 /* cannot ensure iscol_o has same blocksize as iscol! */ 3124 3125 ierr = PetscFree(idx);CHKERRQ(ierr); 3126 *garray = cmap1; 3127 3128 ierr = VecDestroy(&x);CHKERRQ(ierr); 3129 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3130 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3131 PetscFunctionReturn(0); 3132 } 3133 3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3136 { 3137 PetscErrorCode ierr; 3138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3139 Mat M = NULL; 3140 MPI_Comm comm; 3141 IS iscol_d,isrow_d,iscol_o; 3142 Mat Asub = NULL,Bsub = NULL; 3143 PetscInt n; 3144 3145 PetscFunctionBegin; 3146 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3147 3148 if (call == MAT_REUSE_MATRIX) { 3149 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3150 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3151 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3152 3153 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3154 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3155 3156 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3157 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3158 3159 /* Update diagonal and off-diagonal portions of submat */ 3160 asub = (Mat_MPIAIJ*)(*submat)->data; 3161 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3162 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3163 if (n) { 3164 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3165 } 3166 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 3169 } else { /* call == MAT_INITIAL_MATRIX) */ 3170 const PetscInt *garray; 3171 PetscInt BsubN; 3172 3173 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3174 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3175 3176 /* Create local submatrices Asub and Bsub */ 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3178 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3179 3180 /* Create submatrix M */ 3181 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3182 3183 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3184 asub = (Mat_MPIAIJ*)M->data; 3185 3186 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3187 n = asub->B->cmap->N; 3188 if (BsubN > n) { 3189 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3190 const PetscInt *idx; 3191 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3192 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3193 3194 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3195 j = 0; 3196 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3197 for (i=0; i<n; i++) { 3198 if (j >= BsubN) break; 3199 while (subgarray[i] > garray[j]) j++; 3200 3201 if (subgarray[i] == garray[j]) { 3202 idx_new[i] = idx[j++]; 3203 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3204 } 3205 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3206 3207 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3209 3210 } else if (BsubN < n) { 3211 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3212 } 3213 3214 ierr = PetscFree(garray);CHKERRQ(ierr); 3215 *submat = M; 3216 3217 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3218 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3219 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3220 3221 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3222 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3223 3224 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3225 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3226 } 3227 PetscFunctionReturn(0); 3228 } 3229 3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3231 { 3232 PetscErrorCode ierr; 3233 IS iscol_local=NULL,isrow_d; 3234 PetscInt csize; 3235 PetscInt n,i,j,start,end; 3236 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3237 MPI_Comm comm; 3238 3239 PetscFunctionBegin; 3240 /* If isrow has same processor distribution as mat, 3241 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3242 if (call == MAT_REUSE_MATRIX) { 3243 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3244 if (isrow_d) { 3245 sameRowDist = PETSC_TRUE; 3246 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3247 } else { 3248 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3249 if (iscol_local) { 3250 sameRowDist = PETSC_TRUE; 3251 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3252 } 3253 } 3254 } else { 3255 /* Check if isrow has same processor distribution as mat */ 3256 sameDist[0] = PETSC_FALSE; 3257 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3258 if (!n) { 3259 sameDist[0] = PETSC_TRUE; 3260 } else { 3261 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3262 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3263 if (i >= start && j < end) { 3264 sameDist[0] = PETSC_TRUE; 3265 } 3266 } 3267 3268 /* Check if iscol has same processor distribution as mat */ 3269 sameDist[1] = PETSC_FALSE; 3270 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3271 if (!n) { 3272 sameDist[1] = PETSC_TRUE; 3273 } else { 3274 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3275 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3276 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3277 } 3278 3279 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3280 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3281 sameRowDist = tsameDist[0]; 3282 } 3283 3284 if (sameRowDist) { 3285 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3286 /* isrow and iscol have same processor distribution as mat */ 3287 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } else { /* sameRowDist */ 3290 /* isrow has same processor distribution as mat */ 3291 if (call == MAT_INITIAL_MATRIX) { 3292 PetscBool sorted; 3293 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3294 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3295 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3296 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3297 3298 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3299 if (sorted) { 3300 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3301 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 } else { /* call == MAT_REUSE_MATRIX */ 3305 IS iscol_sub; 3306 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3307 if (iscol_sub) { 3308 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 } 3312 } 3313 } 3314 3315 /* General case: iscol -> iscol_local which has global size of iscol */ 3316 if (call == MAT_REUSE_MATRIX) { 3317 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3318 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3319 } else { 3320 if (!iscol_local) { 3321 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3322 } 3323 } 3324 3325 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3326 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3327 3328 if (call == MAT_INITIAL_MATRIX) { 3329 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3330 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3331 } 3332 PetscFunctionReturn(0); 3333 } 3334 3335 /*@C 3336 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3337 and "off-diagonal" part of the matrix in CSR format. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + comm - MPI communicator 3343 . A - "diagonal" portion of matrix 3344 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3345 - garray - global index of B columns 3346 3347 Output Parameter: 3348 . mat - the matrix, with input A as its local diagonal matrix 3349 Level: advanced 3350 3351 Notes: 3352 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3353 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3354 3355 .seealso: MatCreateMPIAIJWithSplitArrays() 3356 @*/ 3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3358 { 3359 PetscErrorCode ierr; 3360 Mat_MPIAIJ *maij; 3361 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3362 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3363 PetscScalar *oa=b->a; 3364 Mat Bnew; 3365 PetscInt m,n,N; 3366 3367 PetscFunctionBegin; 3368 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3369 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3370 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3371 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3372 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3373 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3374 3375 /* Get global columns of mat */ 3376 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3377 3378 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3379 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3380 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3381 maij = (Mat_MPIAIJ*)(*mat)->data; 3382 3383 (*mat)->preallocated = PETSC_TRUE; 3384 3385 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3386 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3387 3388 /* Set A as diagonal portion of *mat */ 3389 maij->A = A; 3390 3391 nz = oi[m]; 3392 for (i=0; i<nz; i++) { 3393 col = oj[i]; 3394 oj[i] = garray[col]; 3395 } 3396 3397 /* Set Bnew as off-diagonal portion of *mat */ 3398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3399 bnew = (Mat_SeqAIJ*)Bnew->data; 3400 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3401 maij->B = Bnew; 3402 3403 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3404 3405 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3406 b->free_a = PETSC_FALSE; 3407 b->free_ij = PETSC_FALSE; 3408 ierr = MatDestroy(&B);CHKERRQ(ierr); 3409 3410 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3411 bnew->free_a = PETSC_TRUE; 3412 bnew->free_ij = PETSC_TRUE; 3413 3414 /* condense columns of maij->B */ 3415 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3416 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3419 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3424 3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3426 { 3427 PetscErrorCode ierr; 3428 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3429 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3430 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3431 Mat M,Msub,B=a->B; 3432 MatScalar *aa; 3433 Mat_SeqAIJ *aij; 3434 PetscInt *garray = a->garray,*colsub,Ncols; 3435 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3436 IS iscol_sub,iscmap; 3437 const PetscInt *is_idx,*cmap; 3438 PetscBool allcolumns=PETSC_FALSE; 3439 MPI_Comm comm; 3440 3441 PetscFunctionBegin; 3442 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3443 3444 if (call == MAT_REUSE_MATRIX) { 3445 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3446 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3447 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3448 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3450 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3451 3452 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3453 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3454 3455 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3456 3457 } else { /* call == MAT_INITIAL_MATRIX) */ 3458 PetscBool flg; 3459 3460 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3461 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3462 3463 /* (1) iscol -> nonscalable iscol_local */ 3464 /* Check for special case: each processor gets entire matrix columns */ 3465 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3466 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3467 if (allcolumns) { 3468 iscol_sub = iscol_local; 3469 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3470 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3471 3472 } else { 3473 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3474 PetscInt *idx,*cmap1,k; 3475 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3476 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3477 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3478 count = 0; 3479 k = 0; 3480 for (i=0; i<Ncols; i++) { 3481 j = is_idx[i]; 3482 if (j >= cstart && j < cend) { 3483 /* diagonal part of mat */ 3484 idx[count] = j; 3485 cmap1[count++] = i; /* column index in submat */ 3486 } else if (Bn) { 3487 /* off-diagonal part of mat */ 3488 if (j == garray[k]) { 3489 idx[count] = j; 3490 cmap1[count++] = i; /* column index in submat */ 3491 } else if (j > garray[k]) { 3492 while (j > garray[k] && k < Bn-1) k++; 3493 if (j == garray[k]) { 3494 idx[count] = j; 3495 cmap1[count++] = i; /* column index in submat */ 3496 } 3497 } 3498 } 3499 } 3500 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3501 3502 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3503 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3504 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3505 3506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3507 } 3508 3509 /* (3) Create sequential Msub */ 3510 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3511 } 3512 3513 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3514 aij = (Mat_SeqAIJ*)(Msub)->data; 3515 ii = aij->i; 3516 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3517 3518 /* 3519 m - number of local rows 3520 Ncols - number of columns (same on all processors) 3521 rstart - first row in new global matrix generated 3522 */ 3523 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3524 3525 if (call == MAT_INITIAL_MATRIX) { 3526 /* (4) Create parallel newmat */ 3527 PetscMPIInt rank,size; 3528 PetscInt csize; 3529 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3532 3533 /* 3534 Determine the number of non-zeros in the diagonal and off-diagonal 3535 portions of the matrix in order to do correct preallocation 3536 */ 3537 3538 /* first get start and end of "diagonal" columns */ 3539 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3540 if (csize == PETSC_DECIDE) { 3541 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3542 if (mglobal == Ncols) { /* square matrix */ 3543 nlocal = m; 3544 } else { 3545 nlocal = Ncols/size + ((Ncols % size) > rank); 3546 } 3547 } else { 3548 nlocal = csize; 3549 } 3550 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3551 rstart = rend - nlocal; 3552 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3553 3554 /* next, compute all the lengths */ 3555 jj = aij->j; 3556 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3557 olens = dlens + m; 3558 for (i=0; i<m; i++) { 3559 jend = ii[i+1] - ii[i]; 3560 olen = 0; 3561 dlen = 0; 3562 for (j=0; j<jend; j++) { 3563 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3564 else dlen++; 3565 jj++; 3566 } 3567 olens[i] = olen; 3568 dlens[i] = dlen; 3569 } 3570 3571 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3572 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3573 3574 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3575 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3576 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3577 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3578 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3579 ierr = PetscFree(dlens);CHKERRQ(ierr); 3580 3581 } else { /* call == MAT_REUSE_MATRIX */ 3582 M = *newmat; 3583 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3584 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3585 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3586 /* 3587 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3588 rather than the slower MatSetValues(). 3589 */ 3590 M->was_assembled = PETSC_TRUE; 3591 M->assembled = PETSC_FALSE; 3592 } 3593 3594 /* (5) Set values of Msub to *newmat */ 3595 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3596 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3597 3598 jj = aij->j; 3599 aa = aij->a; 3600 for (i=0; i<m; i++) { 3601 row = rstart + i; 3602 nz = ii[i+1] - ii[i]; 3603 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3604 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3605 jj += nz; aa += nz; 3606 } 3607 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3608 3609 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 3612 ierr = PetscFree(colsub);CHKERRQ(ierr); 3613 3614 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3615 if (call == MAT_INITIAL_MATRIX) { 3616 *newmat = M; 3617 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3618 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3619 3620 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3621 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3622 3623 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3624 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3625 3626 if (iscol_local) { 3627 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3628 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3629 } 3630 } 3631 PetscFunctionReturn(0); 3632 } 3633 3634 /* 3635 Not great since it makes two copies of the submatrix, first an SeqAIJ 3636 in local and then by concatenating the local matrices the end result. 3637 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3638 3639 Note: This requires a sequential iscol with all indices. 3640 */ 3641 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3642 { 3643 PetscErrorCode ierr; 3644 PetscMPIInt rank,size; 3645 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3646 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3647 Mat M,Mreuse; 3648 MatScalar *aa,*vwork; 3649 MPI_Comm comm; 3650 Mat_SeqAIJ *aij; 3651 PetscBool colflag,allcolumns=PETSC_FALSE; 3652 3653 PetscFunctionBegin; 3654 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3657 3658 /* Check for special case: each processor gets entire matrix columns */ 3659 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3660 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3661 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3662 3663 if (call == MAT_REUSE_MATRIX) { 3664 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3665 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3666 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3667 } else { 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3669 } 3670 3671 /* 3672 m - number of local rows 3673 n - number of columns (same on all processors) 3674 rstart - first row in new global matrix generated 3675 */ 3676 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3677 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3678 if (call == MAT_INITIAL_MATRIX) { 3679 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3680 ii = aij->i; 3681 jj = aij->j; 3682 3683 /* 3684 Determine the number of non-zeros in the diagonal and off-diagonal 3685 portions of the matrix in order to do correct preallocation 3686 */ 3687 3688 /* first get start and end of "diagonal" columns */ 3689 if (csize == PETSC_DECIDE) { 3690 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3691 if (mglobal == n) { /* square matrix */ 3692 nlocal = m; 3693 } else { 3694 nlocal = n/size + ((n % size) > rank); 3695 } 3696 } else { 3697 nlocal = csize; 3698 } 3699 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3700 rstart = rend - nlocal; 3701 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3702 3703 /* next, compute all the lengths */ 3704 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3705 olens = dlens + m; 3706 for (i=0; i<m; i++) { 3707 jend = ii[i+1] - ii[i]; 3708 olen = 0; 3709 dlen = 0; 3710 for (j=0; j<jend; j++) { 3711 if (*jj < rstart || *jj >= rend) olen++; 3712 else dlen++; 3713 jj++; 3714 } 3715 olens[i] = olen; 3716 dlens[i] = dlen; 3717 } 3718 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3719 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3720 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3721 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3722 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3723 ierr = PetscFree(dlens);CHKERRQ(ierr); 3724 } else { 3725 PetscInt ml,nl; 3726 3727 M = *newmat; 3728 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3729 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3731 /* 3732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3733 rather than the slower MatSetValues(). 3734 */ 3735 M->was_assembled = PETSC_TRUE; 3736 M->assembled = PETSC_FALSE; 3737 } 3738 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3739 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3740 ii = aij->i; 3741 jj = aij->j; 3742 aa = aij->a; 3743 for (i=0; i<m; i++) { 3744 row = rstart + i; 3745 nz = ii[i+1] - ii[i]; 3746 cwork = jj; jj += nz; 3747 vwork = aa; aa += nz; 3748 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 3751 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 *newmat = M; 3754 3755 /* save submatrix used in processor for next request */ 3756 if (call == MAT_INITIAL_MATRIX) { 3757 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3758 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3759 } 3760 PetscFunctionReturn(0); 3761 } 3762 3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3764 { 3765 PetscInt m,cstart, cend,j,nnz,i,d; 3766 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3767 const PetscInt *JJ; 3768 PetscScalar *values; 3769 PetscErrorCode ierr; 3770 PetscBool nooffprocentries; 3771 3772 PetscFunctionBegin; 3773 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3774 3775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3777 m = B->rmap->n; 3778 cstart = B->cmap->rstart; 3779 cend = B->cmap->rend; 3780 rstart = B->rmap->rstart; 3781 3782 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3783 3784 #if defined(PETSC_USE_DEBUGGING) 3785 for (i=0; i<m; i++) { 3786 nnz = Ii[i+1]- Ii[i]; 3787 JJ = J + Ii[i]; 3788 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3789 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3790 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3791 } 3792 #endif 3793 3794 for (i=0; i<m; i++) { 3795 nnz = Ii[i+1]- Ii[i]; 3796 JJ = J + Ii[i]; 3797 nnz_max = PetscMax(nnz_max,nnz); 3798 d = 0; 3799 for (j=0; j<nnz; j++) { 3800 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3801 } 3802 d_nnz[i] = d; 3803 o_nnz[i] = nnz - d; 3804 } 3805 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3806 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3807 3808 if (v) values = (PetscScalar*)v; 3809 else { 3810 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3811 } 3812 3813 for (i=0; i<m; i++) { 3814 ii = i + rstart; 3815 nnz = Ii[i+1]- Ii[i]; 3816 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3817 } 3818 nooffprocentries = B->nooffprocentries; 3819 B->nooffprocentries = PETSC_TRUE; 3820 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3821 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 B->nooffprocentries = nooffprocentries; 3823 3824 if (!v) { 3825 ierr = PetscFree(values);CHKERRQ(ierr); 3826 } 3827 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 /*@ 3832 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3833 (the default parallel PETSc format). 3834 3835 Collective on MPI_Comm 3836 3837 Input Parameters: 3838 + B - the matrix 3839 . i - the indices into j for the start of each local row (starts with zero) 3840 . j - the column indices for each local row (starts with zero) 3841 - v - optional values in the matrix 3842 3843 Level: developer 3844 3845 Notes: 3846 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3847 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3848 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3849 3850 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3851 3852 The format which is used for the sparse matrix input, is equivalent to a 3853 row-major ordering.. i.e for the following matrix, the input data expected is 3854 as shown 3855 3856 $ 1 0 0 3857 $ 2 0 3 P0 3858 $ ------- 3859 $ 4 5 6 P1 3860 $ 3861 $ Process0 [P0]: rows_owned=[0,1] 3862 $ i = {0,1,3} [size = nrow+1 = 2+1] 3863 $ j = {0,0,2} [size = 3] 3864 $ v = {1,2,3} [size = 3] 3865 $ 3866 $ Process1 [P1]: rows_owned=[2] 3867 $ i = {0,3} [size = nrow+1 = 1+1] 3868 $ j = {0,1,2} [size = 3] 3869 $ v = {4,5,6} [size = 3] 3870 3871 .keywords: matrix, aij, compressed row, sparse, parallel 3872 3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3874 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3875 @*/ 3876 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3877 { 3878 PetscErrorCode ierr; 3879 3880 PetscFunctionBegin; 3881 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3882 PetscFunctionReturn(0); 3883 } 3884 3885 /*@C 3886 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3887 (the default parallel PETSc format). For good matrix assembly performance 3888 the user should preallocate the matrix storage by setting the parameters 3889 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3890 performance can be increased by more than a factor of 50. 3891 3892 Collective on MPI_Comm 3893 3894 Input Parameters: 3895 + B - the matrix 3896 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3897 (same value is used for all local rows) 3898 . d_nnz - array containing the number of nonzeros in the various rows of the 3899 DIAGONAL portion of the local submatrix (possibly different for each row) 3900 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3901 The size of this array is equal to the number of local rows, i.e 'm'. 3902 For matrices that will be factored, you must leave room for (and set) 3903 the diagonal entry even if it is zero. 3904 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3905 submatrix (same value is used for all local rows). 3906 - o_nnz - array containing the number of nonzeros in the various rows of the 3907 OFF-DIAGONAL portion of the local submatrix (possibly different for 3908 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3909 structure. The size of this array is equal to the number 3910 of local rows, i.e 'm'. 3911 3912 If the *_nnz parameter is given then the *_nz parameter is ignored 3913 3914 The AIJ format (also called the Yale sparse matrix format or 3915 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3916 storage. The stored row and column indices begin with zero. 3917 See Users-Manual: ch_mat for details. 3918 3919 The parallel matrix is partitioned such that the first m0 rows belong to 3920 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3921 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3922 3923 The DIAGONAL portion of the local submatrix of a processor can be defined 3924 as the submatrix which is obtained by extraction the part corresponding to 3925 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3926 first row that belongs to the processor, r2 is the last row belonging to 3927 the this processor, and c1-c2 is range of indices of the local part of a 3928 vector suitable for applying the matrix to. This is an mxn matrix. In the 3929 common case of a square matrix, the row and column ranges are the same and 3930 the DIAGONAL part is also square. The remaining portion of the local 3931 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3932 3933 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3934 3935 You can call MatGetInfo() to get information on how effective the preallocation was; 3936 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3937 You can also run with the option -info and look for messages with the string 3938 malloc in them to see if additional memory allocation was needed. 3939 3940 Example usage: 3941 3942 Consider the following 8x8 matrix with 34 non-zero values, that is 3943 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3944 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3945 as follows: 3946 3947 .vb 3948 1 2 0 | 0 3 0 | 0 4 3949 Proc0 0 5 6 | 7 0 0 | 8 0 3950 9 0 10 | 11 0 0 | 12 0 3951 ------------------------------------- 3952 13 0 14 | 15 16 17 | 0 0 3953 Proc1 0 18 0 | 19 20 21 | 0 0 3954 0 0 0 | 22 23 0 | 24 0 3955 ------------------------------------- 3956 Proc2 25 26 27 | 0 0 28 | 29 0 3957 30 0 0 | 31 32 33 | 0 34 3958 .ve 3959 3960 This can be represented as a collection of submatrices as: 3961 3962 .vb 3963 A B C 3964 D E F 3965 G H I 3966 .ve 3967 3968 Where the submatrices A,B,C are owned by proc0, D,E,F are 3969 owned by proc1, G,H,I are owned by proc2. 3970 3971 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3972 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3973 The 'M','N' parameters are 8,8, and have the same values on all procs. 3974 3975 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3976 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3977 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3978 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3979 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3980 matrix, ans [DF] as another SeqAIJ matrix. 3981 3982 When d_nz, o_nz parameters are specified, d_nz storage elements are 3983 allocated for every row of the local diagonal submatrix, and o_nz 3984 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3985 One way to choose d_nz and o_nz is to use the max nonzerors per local 3986 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3987 In this case, the values of d_nz,o_nz are: 3988 .vb 3989 proc0 : dnz = 2, o_nz = 2 3990 proc1 : dnz = 3, o_nz = 2 3991 proc2 : dnz = 1, o_nz = 4 3992 .ve 3993 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3994 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3995 for proc3. i.e we are using 12+15+10=37 storage locations to store 3996 34 values. 3997 3998 When d_nnz, o_nnz parameters are specified, the storage is specified 3999 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4000 In the above case the values for d_nnz,o_nnz are: 4001 .vb 4002 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4003 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4004 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4005 .ve 4006 Here the space allocated is sum of all the above values i.e 34, and 4007 hence pre-allocation is perfect. 4008 4009 Level: intermediate 4010 4011 .keywords: matrix, aij, compressed row, sparse, parallel 4012 4013 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4014 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4015 @*/ 4016 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4017 { 4018 PetscErrorCode ierr; 4019 4020 PetscFunctionBegin; 4021 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4022 PetscValidType(B,1); 4023 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4024 PetscFunctionReturn(0); 4025 } 4026 4027 /*@ 4028 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4029 CSR format the local rows. 4030 4031 Collective on MPI_Comm 4032 4033 Input Parameters: 4034 + comm - MPI communicator 4035 . m - number of local rows (Cannot be PETSC_DECIDE) 4036 . n - This value should be the same as the local size used in creating the 4037 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4038 calculated if N is given) For square matrices n is almost always m. 4039 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4040 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4041 . i - row indices 4042 . j - column indices 4043 - a - matrix values 4044 4045 Output Parameter: 4046 . mat - the matrix 4047 4048 Level: intermediate 4049 4050 Notes: 4051 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4052 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4053 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4054 4055 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4056 4057 The format which is used for the sparse matrix input, is equivalent to a 4058 row-major ordering.. i.e for the following matrix, the input data expected is 4059 as shown 4060 4061 $ 1 0 0 4062 $ 2 0 3 P0 4063 $ ------- 4064 $ 4 5 6 P1 4065 $ 4066 $ Process0 [P0]: rows_owned=[0,1] 4067 $ i = {0,1,3} [size = nrow+1 = 2+1] 4068 $ j = {0,0,2} [size = 3] 4069 $ v = {1,2,3} [size = 3] 4070 $ 4071 $ Process1 [P1]: rows_owned=[2] 4072 $ i = {0,3} [size = nrow+1 = 1+1] 4073 $ j = {0,1,2} [size = 3] 4074 $ v = {4,5,6} [size = 3] 4075 4076 .keywords: matrix, aij, compressed row, sparse, parallel 4077 4078 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4079 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4080 @*/ 4081 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4082 { 4083 PetscErrorCode ierr; 4084 4085 PetscFunctionBegin; 4086 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4087 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4088 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4089 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4090 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4091 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4092 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4093 PetscFunctionReturn(0); 4094 } 4095 4096 /*@C 4097 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4098 (the default parallel PETSc format). For good matrix assembly performance 4099 the user should preallocate the matrix storage by setting the parameters 4100 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4101 performance can be increased by more than a factor of 50. 4102 4103 Collective on MPI_Comm 4104 4105 Input Parameters: 4106 + comm - MPI communicator 4107 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4108 This value should be the same as the local size used in creating the 4109 y vector for the matrix-vector product y = Ax. 4110 . n - This value should be the same as the local size used in creating the 4111 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4112 calculated if N is given) For square matrices n is almost always m. 4113 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4114 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4115 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4116 (same value is used for all local rows) 4117 . d_nnz - array containing the number of nonzeros in the various rows of the 4118 DIAGONAL portion of the local submatrix (possibly different for each row) 4119 or NULL, if d_nz is used to specify the nonzero structure. 4120 The size of this array is equal to the number of local rows, i.e 'm'. 4121 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4122 submatrix (same value is used for all local rows). 4123 - o_nnz - array containing the number of nonzeros in the various rows of the 4124 OFF-DIAGONAL portion of the local submatrix (possibly different for 4125 each row) or NULL, if o_nz is used to specify the nonzero 4126 structure. The size of this array is equal to the number 4127 of local rows, i.e 'm'. 4128 4129 Output Parameter: 4130 . A - the matrix 4131 4132 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4133 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4134 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4135 4136 Notes: 4137 If the *_nnz parameter is given then the *_nz parameter is ignored 4138 4139 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4140 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4141 storage requirements for this matrix. 4142 4143 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4144 processor than it must be used on all processors that share the object for 4145 that argument. 4146 4147 The user MUST specify either the local or global matrix dimensions 4148 (possibly both). 4149 4150 The parallel matrix is partitioned across processors such that the 4151 first m0 rows belong to process 0, the next m1 rows belong to 4152 process 1, the next m2 rows belong to process 2 etc.. where 4153 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4154 values corresponding to [m x N] submatrix. 4155 4156 The columns are logically partitioned with the n0 columns belonging 4157 to 0th partition, the next n1 columns belonging to the next 4158 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4159 4160 The DIAGONAL portion of the local submatrix on any given processor 4161 is the submatrix corresponding to the rows and columns m,n 4162 corresponding to the given processor. i.e diagonal matrix on 4163 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4164 etc. The remaining portion of the local submatrix [m x (N-n)] 4165 constitute the OFF-DIAGONAL portion. The example below better 4166 illustrates this concept. 4167 4168 For a square global matrix we define each processor's diagonal portion 4169 to be its local rows and the corresponding columns (a square submatrix); 4170 each processor's off-diagonal portion encompasses the remainder of the 4171 local matrix (a rectangular submatrix). 4172 4173 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4174 4175 When calling this routine with a single process communicator, a matrix of 4176 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4177 type of communicator, use the construction mechanism 4178 .vb 4179 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4180 .ve 4181 4182 $ MatCreate(...,&A); 4183 $ MatSetType(A,MATMPIAIJ); 4184 $ MatSetSizes(A, m,n,M,N); 4185 $ MatMPIAIJSetPreallocation(A,...); 4186 4187 By default, this format uses inodes (identical nodes) when possible. 4188 We search for consecutive rows with the same nonzero structure, thereby 4189 reusing matrix information to achieve increased efficiency. 4190 4191 Options Database Keys: 4192 + -mat_no_inode - Do not use inodes 4193 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4194 - -mat_aij_oneindex - Internally use indexing starting at 1 4195 rather than 0. Note that when calling MatSetValues(), 4196 the user still MUST index entries starting at 0! 4197 4198 4199 Example usage: 4200 4201 Consider the following 8x8 matrix with 34 non-zero values, that is 4202 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4203 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4204 as follows 4205 4206 .vb 4207 1 2 0 | 0 3 0 | 0 4 4208 Proc0 0 5 6 | 7 0 0 | 8 0 4209 9 0 10 | 11 0 0 | 12 0 4210 ------------------------------------- 4211 13 0 14 | 15 16 17 | 0 0 4212 Proc1 0 18 0 | 19 20 21 | 0 0 4213 0 0 0 | 22 23 0 | 24 0 4214 ------------------------------------- 4215 Proc2 25 26 27 | 0 0 28 | 29 0 4216 30 0 0 | 31 32 33 | 0 34 4217 .ve 4218 4219 This can be represented as a collection of submatrices as 4220 4221 .vb 4222 A B C 4223 D E F 4224 G H I 4225 .ve 4226 4227 Where the submatrices A,B,C are owned by proc0, D,E,F are 4228 owned by proc1, G,H,I are owned by proc2. 4229 4230 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4231 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4232 The 'M','N' parameters are 8,8, and have the same values on all procs. 4233 4234 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4235 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4236 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4237 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4238 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4239 matrix, ans [DF] as another SeqAIJ matrix. 4240 4241 When d_nz, o_nz parameters are specified, d_nz storage elements are 4242 allocated for every row of the local diagonal submatrix, and o_nz 4243 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4244 One way to choose d_nz and o_nz is to use the max nonzerors per local 4245 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4246 In this case, the values of d_nz,o_nz are 4247 .vb 4248 proc0 : dnz = 2, o_nz = 2 4249 proc1 : dnz = 3, o_nz = 2 4250 proc2 : dnz = 1, o_nz = 4 4251 .ve 4252 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4253 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4254 for proc3. i.e we are using 12+15+10=37 storage locations to store 4255 34 values. 4256 4257 When d_nnz, o_nnz parameters are specified, the storage is specified 4258 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4259 In the above case the values for d_nnz,o_nnz are 4260 .vb 4261 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4262 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4263 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4264 .ve 4265 Here the space allocated is sum of all the above values i.e 34, and 4266 hence pre-allocation is perfect. 4267 4268 Level: intermediate 4269 4270 .keywords: matrix, aij, compressed row, sparse, parallel 4271 4272 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4273 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4274 @*/ 4275 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4276 { 4277 PetscErrorCode ierr; 4278 PetscMPIInt size; 4279 4280 PetscFunctionBegin; 4281 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4282 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4283 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4284 if (size > 1) { 4285 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4286 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4287 } else { 4288 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4289 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4290 } 4291 PetscFunctionReturn(0); 4292 } 4293 4294 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4295 { 4296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4297 PetscBool flg; 4298 PetscErrorCode ierr; 4299 4300 PetscFunctionBegin; 4301 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4302 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4303 if (Ad) *Ad = a->A; 4304 if (Ao) *Ao = a->B; 4305 if (colmap) *colmap = a->garray; 4306 PetscFunctionReturn(0); 4307 } 4308 4309 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4310 { 4311 PetscErrorCode ierr; 4312 PetscInt m,N,i,rstart,nnz,Ii; 4313 PetscInt *indx; 4314 PetscScalar *values; 4315 4316 PetscFunctionBegin; 4317 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4318 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4319 PetscInt *dnz,*onz,sum,bs,cbs; 4320 4321 if (n == PETSC_DECIDE) { 4322 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4323 } 4324 /* Check sum(n) = N */ 4325 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4326 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4327 4328 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4329 rstart -= m; 4330 4331 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4332 for (i=0; i<m; i++) { 4333 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4334 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4335 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4336 } 4337 4338 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4339 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4340 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4341 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4342 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4343 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4344 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4345 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4346 } 4347 4348 /* numeric phase */ 4349 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4350 for (i=0; i<m; i++) { 4351 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4352 Ii = i + rstart; 4353 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4354 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4355 } 4356 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4358 PetscFunctionReturn(0); 4359 } 4360 4361 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4362 { 4363 PetscErrorCode ierr; 4364 PetscMPIInt rank; 4365 PetscInt m,N,i,rstart,nnz; 4366 size_t len; 4367 const PetscInt *indx; 4368 PetscViewer out; 4369 char *name; 4370 Mat B; 4371 const PetscScalar *values; 4372 4373 PetscFunctionBegin; 4374 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4375 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4376 /* Should this be the type of the diagonal block of A? */ 4377 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4378 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4379 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4380 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4381 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4382 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4383 for (i=0; i<m; i++) { 4384 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4385 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4386 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4387 } 4388 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4389 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4390 4391 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4392 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4393 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4394 sprintf(name,"%s.%d",outfile,rank); 4395 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4396 ierr = PetscFree(name);CHKERRQ(ierr); 4397 ierr = MatView(B,out);CHKERRQ(ierr); 4398 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4399 ierr = MatDestroy(&B);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4404 { 4405 PetscErrorCode ierr; 4406 Mat_Merge_SeqsToMPI *merge; 4407 PetscContainer container; 4408 4409 PetscFunctionBegin; 4410 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4411 if (container) { 4412 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4425 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4426 ierr = PetscFree(merge);CHKERRQ(ierr); 4427 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4428 } 4429 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #include <../src/mat/utils/freespace.h> 4434 #include <petscbt.h> 4435 4436 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4437 { 4438 PetscErrorCode ierr; 4439 MPI_Comm comm; 4440 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4441 PetscMPIInt size,rank,taga,*len_s; 4442 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4443 PetscInt proc,m; 4444 PetscInt **buf_ri,**buf_rj; 4445 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4446 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4447 MPI_Request *s_waits,*r_waits; 4448 MPI_Status *status; 4449 MatScalar *aa=a->a; 4450 MatScalar **abuf_r,*ba_i; 4451 Mat_Merge_SeqsToMPI *merge; 4452 PetscContainer container; 4453 4454 PetscFunctionBegin; 4455 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4456 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4457 4458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4459 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4460 4461 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4462 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4463 4464 bi = merge->bi; 4465 bj = merge->bj; 4466 buf_ri = merge->buf_ri; 4467 buf_rj = merge->buf_rj; 4468 4469 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4470 owners = merge->rowmap->range; 4471 len_s = merge->len_s; 4472 4473 /* send and recv matrix values */ 4474 /*-----------------------------*/ 4475 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4476 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4477 4478 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4479 for (proc=0,k=0; proc<size; proc++) { 4480 if (!len_s[proc]) continue; 4481 i = owners[proc]; 4482 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4483 k++; 4484 } 4485 4486 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4487 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4488 ierr = PetscFree(status);CHKERRQ(ierr); 4489 4490 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4491 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4492 4493 /* insert mat values of mpimat */ 4494 /*----------------------------*/ 4495 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4496 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4497 4498 for (k=0; k<merge->nrecv; k++) { 4499 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4500 nrows = *(buf_ri_k[k]); 4501 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4502 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4503 } 4504 4505 /* set values of ba */ 4506 m = merge->rowmap->n; 4507 for (i=0; i<m; i++) { 4508 arow = owners[rank] + i; 4509 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4510 bnzi = bi[i+1] - bi[i]; 4511 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4512 4513 /* add local non-zero vals of this proc's seqmat into ba */ 4514 anzi = ai[arow+1] - ai[arow]; 4515 aj = a->j + ai[arow]; 4516 aa = a->a + ai[arow]; 4517 nextaj = 0; 4518 for (j=0; nextaj<anzi; j++) { 4519 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4520 ba_i[j] += aa[nextaj++]; 4521 } 4522 } 4523 4524 /* add received vals into ba */ 4525 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4526 /* i-th row */ 4527 if (i == *nextrow[k]) { 4528 anzi = *(nextai[k]+1) - *nextai[k]; 4529 aj = buf_rj[k] + *(nextai[k]); 4530 aa = abuf_r[k] + *(nextai[k]); 4531 nextaj = 0; 4532 for (j=0; nextaj<anzi; j++) { 4533 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4534 ba_i[j] += aa[nextaj++]; 4535 } 4536 } 4537 nextrow[k]++; nextai[k]++; 4538 } 4539 } 4540 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4541 } 4542 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4543 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4544 4545 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4546 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4547 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4548 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4549 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4550 PetscFunctionReturn(0); 4551 } 4552 4553 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4554 { 4555 PetscErrorCode ierr; 4556 Mat B_mpi; 4557 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4558 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4559 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4560 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4561 PetscInt len,proc,*dnz,*onz,bs,cbs; 4562 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4563 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4564 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4565 MPI_Status *status; 4566 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4567 PetscBT lnkbt; 4568 Mat_Merge_SeqsToMPI *merge; 4569 PetscContainer container; 4570 4571 PetscFunctionBegin; 4572 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4573 4574 /* make sure it is a PETSc comm */ 4575 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4577 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4578 4579 ierr = PetscNew(&merge);CHKERRQ(ierr); 4580 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4581 4582 /* determine row ownership */ 4583 /*---------------------------------------------------------*/ 4584 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4585 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4586 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4587 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4588 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4589 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4590 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4591 4592 m = merge->rowmap->n; 4593 owners = merge->rowmap->range; 4594 4595 /* determine the number of messages to send, their lengths */ 4596 /*---------------------------------------------------------*/ 4597 len_s = merge->len_s; 4598 4599 len = 0; /* length of buf_si[] */ 4600 merge->nsend = 0; 4601 for (proc=0; proc<size; proc++) { 4602 len_si[proc] = 0; 4603 if (proc == rank) { 4604 len_s[proc] = 0; 4605 } else { 4606 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4607 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4608 } 4609 if (len_s[proc]) { 4610 merge->nsend++; 4611 nrows = 0; 4612 for (i=owners[proc]; i<owners[proc+1]; i++) { 4613 if (ai[i+1] > ai[i]) nrows++; 4614 } 4615 len_si[proc] = 2*(nrows+1); 4616 len += len_si[proc]; 4617 } 4618 } 4619 4620 /* determine the number and length of messages to receive for ij-structure */ 4621 /*-------------------------------------------------------------------------*/ 4622 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4623 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4624 4625 /* post the Irecv of j-structure */ 4626 /*-------------------------------*/ 4627 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4628 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4629 4630 /* post the Isend of j-structure */ 4631 /*--------------------------------*/ 4632 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4633 4634 for (proc=0, k=0; proc<size; proc++) { 4635 if (!len_s[proc]) continue; 4636 i = owners[proc]; 4637 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4638 k++; 4639 } 4640 4641 /* receives and sends of j-structure are complete */ 4642 /*------------------------------------------------*/ 4643 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4644 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4645 4646 /* send and recv i-structure */ 4647 /*---------------------------*/ 4648 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4649 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4650 4651 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4652 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4653 for (proc=0,k=0; proc<size; proc++) { 4654 if (!len_s[proc]) continue; 4655 /* form outgoing message for i-structure: 4656 buf_si[0]: nrows to be sent 4657 [1:nrows]: row index (global) 4658 [nrows+1:2*nrows+1]: i-structure index 4659 */ 4660 /*-------------------------------------------*/ 4661 nrows = len_si[proc]/2 - 1; 4662 buf_si_i = buf_si + nrows+1; 4663 buf_si[0] = nrows; 4664 buf_si_i[0] = 0; 4665 nrows = 0; 4666 for (i=owners[proc]; i<owners[proc+1]; i++) { 4667 anzi = ai[i+1] - ai[i]; 4668 if (anzi) { 4669 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4670 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4671 nrows++; 4672 } 4673 } 4674 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4675 k++; 4676 buf_si += len_si[proc]; 4677 } 4678 4679 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4680 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4681 4682 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4683 for (i=0; i<merge->nrecv; i++) { 4684 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4685 } 4686 4687 ierr = PetscFree(len_si);CHKERRQ(ierr); 4688 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4689 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4690 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4691 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4692 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4693 ierr = PetscFree(status);CHKERRQ(ierr); 4694 4695 /* compute a local seq matrix in each processor */ 4696 /*----------------------------------------------*/ 4697 /* allocate bi array and free space for accumulating nonzero column info */ 4698 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4699 bi[0] = 0; 4700 4701 /* create and initialize a linked list */ 4702 nlnk = N+1; 4703 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4704 4705 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4706 len = ai[owners[rank+1]] - ai[owners[rank]]; 4707 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4708 4709 current_space = free_space; 4710 4711 /* determine symbolic info for each local row */ 4712 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4713 4714 for (k=0; k<merge->nrecv; k++) { 4715 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4716 nrows = *buf_ri_k[k]; 4717 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4718 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4719 } 4720 4721 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4722 len = 0; 4723 for (i=0; i<m; i++) { 4724 bnzi = 0; 4725 /* add local non-zero cols of this proc's seqmat into lnk */ 4726 arow = owners[rank] + i; 4727 anzi = ai[arow+1] - ai[arow]; 4728 aj = a->j + ai[arow]; 4729 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4730 bnzi += nlnk; 4731 /* add received col data into lnk */ 4732 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4733 if (i == *nextrow[k]) { /* i-th row */ 4734 anzi = *(nextai[k]+1) - *nextai[k]; 4735 aj = buf_rj[k] + *nextai[k]; 4736 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4737 bnzi += nlnk; 4738 nextrow[k]++; nextai[k]++; 4739 } 4740 } 4741 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4742 4743 /* if free space is not available, make more free space */ 4744 if (current_space->local_remaining<bnzi) { 4745 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4746 nspacedouble++; 4747 } 4748 /* copy data into free space, then initialize lnk */ 4749 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4750 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4751 4752 current_space->array += bnzi; 4753 current_space->local_used += bnzi; 4754 current_space->local_remaining -= bnzi; 4755 4756 bi[i+1] = bi[i] + bnzi; 4757 } 4758 4759 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4760 4761 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4762 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4763 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4764 4765 /* create symbolic parallel matrix B_mpi */ 4766 /*---------------------------------------*/ 4767 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4768 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4769 if (n==PETSC_DECIDE) { 4770 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4771 } else { 4772 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4773 } 4774 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4775 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4776 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4777 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4778 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4779 4780 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4781 B_mpi->assembled = PETSC_FALSE; 4782 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4783 merge->bi = bi; 4784 merge->bj = bj; 4785 merge->buf_ri = buf_ri; 4786 merge->buf_rj = buf_rj; 4787 merge->coi = NULL; 4788 merge->coj = NULL; 4789 merge->owners_co = NULL; 4790 4791 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4792 4793 /* attach the supporting struct to B_mpi for reuse */ 4794 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4795 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4796 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4797 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4798 *mpimat = B_mpi; 4799 4800 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4801 PetscFunctionReturn(0); 4802 } 4803 4804 /*@C 4805 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4806 matrices from each processor 4807 4808 Collective on MPI_Comm 4809 4810 Input Parameters: 4811 + comm - the communicators the parallel matrix will live on 4812 . seqmat - the input sequential matrices 4813 . m - number of local rows (or PETSC_DECIDE) 4814 . n - number of local columns (or PETSC_DECIDE) 4815 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4816 4817 Output Parameter: 4818 . mpimat - the parallel matrix generated 4819 4820 Level: advanced 4821 4822 Notes: 4823 The dimensions of the sequential matrix in each processor MUST be the same. 4824 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4825 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4826 @*/ 4827 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4828 { 4829 PetscErrorCode ierr; 4830 PetscMPIInt size; 4831 4832 PetscFunctionBegin; 4833 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4834 if (size == 1) { 4835 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4836 if (scall == MAT_INITIAL_MATRIX) { 4837 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4838 } else { 4839 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4840 } 4841 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4842 PetscFunctionReturn(0); 4843 } 4844 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4845 if (scall == MAT_INITIAL_MATRIX) { 4846 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4847 } 4848 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4849 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 4853 /*@ 4854 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4855 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4856 with MatGetSize() 4857 4858 Not Collective 4859 4860 Input Parameters: 4861 + A - the matrix 4862 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4863 4864 Output Parameter: 4865 . A_loc - the local sequential matrix generated 4866 4867 Level: developer 4868 4869 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4870 4871 @*/ 4872 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4873 { 4874 PetscErrorCode ierr; 4875 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4876 Mat_SeqAIJ *mat,*a,*b; 4877 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4878 MatScalar *aa,*ba,*cam; 4879 PetscScalar *ca; 4880 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4881 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4882 PetscBool match; 4883 MPI_Comm comm; 4884 PetscMPIInt size; 4885 4886 PetscFunctionBegin; 4887 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4888 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4889 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4890 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4891 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4892 4893 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4894 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4895 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4896 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4897 aa = a->a; ba = b->a; 4898 if (scall == MAT_INITIAL_MATRIX) { 4899 if (size == 1) { 4900 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 4904 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4905 ci[0] = 0; 4906 for (i=0; i<am; i++) { 4907 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4908 } 4909 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4910 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4911 k = 0; 4912 for (i=0; i<am; i++) { 4913 ncols_o = bi[i+1] - bi[i]; 4914 ncols_d = ai[i+1] - ai[i]; 4915 /* off-diagonal portion of A */ 4916 for (jo=0; jo<ncols_o; jo++) { 4917 col = cmap[*bj]; 4918 if (col >= cstart) break; 4919 cj[k] = col; bj++; 4920 ca[k++] = *ba++; 4921 } 4922 /* diagonal portion of A */ 4923 for (j=0; j<ncols_d; j++) { 4924 cj[k] = cstart + *aj++; 4925 ca[k++] = *aa++; 4926 } 4927 /* off-diagonal portion of A */ 4928 for (j=jo; j<ncols_o; j++) { 4929 cj[k] = cmap[*bj++]; 4930 ca[k++] = *ba++; 4931 } 4932 } 4933 /* put together the new matrix */ 4934 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4935 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4936 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4937 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4938 mat->free_a = PETSC_TRUE; 4939 mat->free_ij = PETSC_TRUE; 4940 mat->nonew = 0; 4941 } else if (scall == MAT_REUSE_MATRIX) { 4942 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4943 ci = mat->i; cj = mat->j; cam = mat->a; 4944 for (i=0; i<am; i++) { 4945 /* off-diagonal portion of A */ 4946 ncols_o = bi[i+1] - bi[i]; 4947 for (jo=0; jo<ncols_o; jo++) { 4948 col = cmap[*bj]; 4949 if (col >= cstart) break; 4950 *cam++ = *ba++; bj++; 4951 } 4952 /* diagonal portion of A */ 4953 ncols_d = ai[i+1] - ai[i]; 4954 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4955 /* off-diagonal portion of A */ 4956 for (j=jo; j<ncols_o; j++) { 4957 *cam++ = *ba++; bj++; 4958 } 4959 } 4960 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4961 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4962 PetscFunctionReturn(0); 4963 } 4964 4965 /*@C 4966 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4967 4968 Not Collective 4969 4970 Input Parameters: 4971 + A - the matrix 4972 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4973 - row, col - index sets of rows and columns to extract (or NULL) 4974 4975 Output Parameter: 4976 . A_loc - the local sequential matrix generated 4977 4978 Level: developer 4979 4980 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4981 4982 @*/ 4983 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4984 { 4985 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4986 PetscErrorCode ierr; 4987 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4988 IS isrowa,iscola; 4989 Mat *aloc; 4990 PetscBool match; 4991 4992 PetscFunctionBegin; 4993 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4994 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4995 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4996 if (!row) { 4997 start = A->rmap->rstart; end = A->rmap->rend; 4998 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4999 } else { 5000 isrowa = *row; 5001 } 5002 if (!col) { 5003 start = A->cmap->rstart; 5004 cmap = a->garray; 5005 nzA = a->A->cmap->n; 5006 nzB = a->B->cmap->n; 5007 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5008 ncols = 0; 5009 for (i=0; i<nzB; i++) { 5010 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5011 else break; 5012 } 5013 imark = i; 5014 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5015 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5016 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5017 } else { 5018 iscola = *col; 5019 } 5020 if (scall != MAT_INITIAL_MATRIX) { 5021 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5022 aloc[0] = *A_loc; 5023 } 5024 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5025 *A_loc = aloc[0]; 5026 ierr = PetscFree(aloc);CHKERRQ(ierr); 5027 if (!row) { 5028 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5029 } 5030 if (!col) { 5031 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5032 } 5033 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5034 PetscFunctionReturn(0); 5035 } 5036 5037 /*@C 5038 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5039 5040 Collective on Mat 5041 5042 Input Parameters: 5043 + A,B - the matrices in mpiaij format 5044 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5045 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5046 5047 Output Parameter: 5048 + rowb, colb - index sets of rows and columns of B to extract 5049 - B_seq - the sequential matrix generated 5050 5051 Level: developer 5052 5053 @*/ 5054 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5055 { 5056 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5057 PetscErrorCode ierr; 5058 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5059 IS isrowb,iscolb; 5060 Mat *bseq=NULL; 5061 5062 PetscFunctionBegin; 5063 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5064 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5065 } 5066 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5067 5068 if (scall == MAT_INITIAL_MATRIX) { 5069 start = A->cmap->rstart; 5070 cmap = a->garray; 5071 nzA = a->A->cmap->n; 5072 nzB = a->B->cmap->n; 5073 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5074 ncols = 0; 5075 for (i=0; i<nzB; i++) { /* row < local row index */ 5076 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5077 else break; 5078 } 5079 imark = i; 5080 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5081 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5082 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5083 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5084 } else { 5085 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5086 isrowb = *rowb; iscolb = *colb; 5087 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5088 bseq[0] = *B_seq; 5089 } 5090 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5091 *B_seq = bseq[0]; 5092 ierr = PetscFree(bseq);CHKERRQ(ierr); 5093 if (!rowb) { 5094 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5095 } else { 5096 *rowb = isrowb; 5097 } 5098 if (!colb) { 5099 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5100 } else { 5101 *colb = iscolb; 5102 } 5103 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5104 PetscFunctionReturn(0); 5105 } 5106 5107 /* 5108 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5109 of the OFF-DIAGONAL portion of local A 5110 5111 Collective on Mat 5112 5113 Input Parameters: 5114 + A,B - the matrices in mpiaij format 5115 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 5117 Output Parameter: 5118 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5119 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5120 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5121 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5122 5123 Level: developer 5124 5125 */ 5126 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5127 { 5128 VecScatter_MPI_General *gen_to,*gen_from; 5129 PetscErrorCode ierr; 5130 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5131 Mat_SeqAIJ *b_oth; 5132 VecScatter ctx; 5133 MPI_Comm comm; 5134 PetscMPIInt *rprocs,*sprocs,tag,rank; 5135 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5136 PetscInt *rvalues,*svalues; 5137 MatScalar *b_otha,*bufa,*bufA; 5138 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5139 MPI_Request *rwaits = NULL,*swaits = NULL; 5140 MPI_Status *sstatus,rstatus; 5141 PetscMPIInt jj,size; 5142 PetscInt *cols,sbs,rbs; 5143 PetscScalar *vals; 5144 5145 PetscFunctionBegin; 5146 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5147 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5148 5149 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5150 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5151 } 5152 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5153 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5154 5155 if (size == 1) { 5156 startsj_s = NULL; 5157 bufa_ptr = NULL; 5158 *B_oth = NULL; 5159 PetscFunctionReturn(0); 5160 } 5161 5162 if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */ 5163 a->Mvctx_mpi1_flg = PETSC_TRUE; 5164 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5165 } 5166 ctx = a->Mvctx_mpi1; 5167 tag = ((PetscObject)ctx)->tag; 5168 5169 gen_to = (VecScatter_MPI_General*)ctx->todata; 5170 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5171 nrecvs = gen_from->n; 5172 nsends = gen_to->n; 5173 5174 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5175 srow = gen_to->indices; /* local row index to be sent */ 5176 sstarts = gen_to->starts; 5177 sprocs = gen_to->procs; 5178 sstatus = gen_to->sstatus; 5179 sbs = gen_to->bs; 5180 rstarts = gen_from->starts; 5181 rprocs = gen_from->procs; 5182 rbs = gen_from->bs; 5183 5184 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5185 if (scall == MAT_INITIAL_MATRIX) { 5186 /* i-array */ 5187 /*---------*/ 5188 /* post receives */ 5189 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5190 for (i=0; i<nrecvs; i++) { 5191 rowlen = rvalues + rstarts[i]*rbs; 5192 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5193 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5194 } 5195 5196 /* pack the outgoing message */ 5197 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5198 5199 sstartsj[0] = 0; 5200 rstartsj[0] = 0; 5201 len = 0; /* total length of j or a array to be sent */ 5202 k = 0; 5203 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5204 for (i=0; i<nsends; i++) { 5205 rowlen = svalues + sstarts[i]*sbs; 5206 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5207 for (j=0; j<nrows; j++) { 5208 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5209 for (l=0; l<sbs; l++) { 5210 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5211 5212 rowlen[j*sbs+l] = ncols; 5213 5214 len += ncols; 5215 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5216 } 5217 k++; 5218 } 5219 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5220 5221 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5222 } 5223 /* recvs and sends of i-array are completed */ 5224 i = nrecvs; 5225 while (i--) { 5226 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5227 } 5228 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5229 ierr = PetscFree(svalues);CHKERRQ(ierr); 5230 5231 /* allocate buffers for sending j and a arrays */ 5232 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5233 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5234 5235 /* create i-array of B_oth */ 5236 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5237 5238 b_othi[0] = 0; 5239 len = 0; /* total length of j or a array to be received */ 5240 k = 0; 5241 for (i=0; i<nrecvs; i++) { 5242 rowlen = rvalues + rstarts[i]*rbs; 5243 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5244 for (j=0; j<nrows; j++) { 5245 b_othi[k+1] = b_othi[k] + rowlen[j]; 5246 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5247 k++; 5248 } 5249 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5250 } 5251 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5252 5253 /* allocate space for j and a arrrays of B_oth */ 5254 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5255 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5256 5257 /* j-array */ 5258 /*---------*/ 5259 /* post receives of j-array */ 5260 for (i=0; i<nrecvs; i++) { 5261 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5262 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5263 } 5264 5265 /* pack the outgoing message j-array */ 5266 k = 0; 5267 for (i=0; i<nsends; i++) { 5268 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5269 bufJ = bufj+sstartsj[i]; 5270 for (j=0; j<nrows; j++) { 5271 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5272 for (ll=0; ll<sbs; ll++) { 5273 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5274 for (l=0; l<ncols; l++) { 5275 *bufJ++ = cols[l]; 5276 } 5277 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5278 } 5279 } 5280 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5281 } 5282 5283 /* recvs and sends of j-array are completed */ 5284 i = nrecvs; 5285 while (i--) { 5286 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5287 } 5288 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5289 } else if (scall == MAT_REUSE_MATRIX) { 5290 sstartsj = *startsj_s; 5291 rstartsj = *startsj_r; 5292 bufa = *bufa_ptr; 5293 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5294 b_otha = b_oth->a; 5295 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5296 5297 /* a-array */ 5298 /*---------*/ 5299 /* post receives of a-array */ 5300 for (i=0; i<nrecvs; i++) { 5301 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5302 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5303 } 5304 5305 /* pack the outgoing message a-array */ 5306 k = 0; 5307 for (i=0; i<nsends; i++) { 5308 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5309 bufA = bufa+sstartsj[i]; 5310 for (j=0; j<nrows; j++) { 5311 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5312 for (ll=0; ll<sbs; ll++) { 5313 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5314 for (l=0; l<ncols; l++) { 5315 *bufA++ = vals[l]; 5316 } 5317 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5318 } 5319 } 5320 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5321 } 5322 /* recvs and sends of a-array are completed */ 5323 i = nrecvs; 5324 while (i--) { 5325 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5326 } 5327 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5328 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5329 5330 if (scall == MAT_INITIAL_MATRIX) { 5331 /* put together the new matrix */ 5332 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5333 5334 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5335 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5336 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5337 b_oth->free_a = PETSC_TRUE; 5338 b_oth->free_ij = PETSC_TRUE; 5339 b_oth->nonew = 0; 5340 5341 ierr = PetscFree(bufj);CHKERRQ(ierr); 5342 if (!startsj_s || !bufa_ptr) { 5343 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5344 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5345 } else { 5346 *startsj_s = sstartsj; 5347 *startsj_r = rstartsj; 5348 *bufa_ptr = bufa; 5349 } 5350 } 5351 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5352 PetscFunctionReturn(0); 5353 } 5354 5355 /*@C 5356 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5357 5358 Not Collective 5359 5360 Input Parameters: 5361 . A - The matrix in mpiaij format 5362 5363 Output Parameter: 5364 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5365 . colmap - A map from global column index to local index into lvec 5366 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5367 5368 Level: developer 5369 5370 @*/ 5371 #if defined(PETSC_USE_CTABLE) 5372 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5373 #else 5374 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5375 #endif 5376 { 5377 Mat_MPIAIJ *a; 5378 5379 PetscFunctionBegin; 5380 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5381 PetscValidPointer(lvec, 2); 5382 PetscValidPointer(colmap, 3); 5383 PetscValidPointer(multScatter, 4); 5384 a = (Mat_MPIAIJ*) A->data; 5385 if (lvec) *lvec = a->lvec; 5386 if (colmap) *colmap = a->colmap; 5387 if (multScatter) *multScatter = a->Mvctx; 5388 PetscFunctionReturn(0); 5389 } 5390 5391 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5392 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5393 #if defined(PETSC_HAVE_MKL_SPARSE) 5394 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5395 #endif 5396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5397 #if defined(PETSC_HAVE_ELEMENTAL) 5398 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5399 #endif 5400 #if defined(PETSC_HAVE_HYPRE) 5401 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5402 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5403 #endif 5404 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5406 5407 /* 5408 Computes (B'*A')' since computing B*A directly is untenable 5409 5410 n p p 5411 ( ) ( ) ( ) 5412 m ( A ) * n ( B ) = m ( C ) 5413 ( ) ( ) ( ) 5414 5415 */ 5416 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5417 { 5418 PetscErrorCode ierr; 5419 Mat At,Bt,Ct; 5420 5421 PetscFunctionBegin; 5422 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5423 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5424 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5425 ierr = MatDestroy(&At);CHKERRQ(ierr); 5426 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5427 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5428 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5429 PetscFunctionReturn(0); 5430 } 5431 5432 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5433 { 5434 PetscErrorCode ierr; 5435 PetscInt m=A->rmap->n,n=B->cmap->n; 5436 Mat Cmat; 5437 5438 PetscFunctionBegin; 5439 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5440 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5441 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5442 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5443 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5444 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5445 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5446 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5447 5448 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5449 5450 *C = Cmat; 5451 PetscFunctionReturn(0); 5452 } 5453 5454 /* ----------------------------------------------------------------*/ 5455 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5456 { 5457 PetscErrorCode ierr; 5458 5459 PetscFunctionBegin; 5460 if (scall == MAT_INITIAL_MATRIX) { 5461 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5462 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5463 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5464 } 5465 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5466 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5467 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 5471 /*MC 5472 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5473 5474 Options Database Keys: 5475 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5476 5477 Level: beginner 5478 5479 .seealso: MatCreateAIJ() 5480 M*/ 5481 5482 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5483 { 5484 Mat_MPIAIJ *b; 5485 PetscErrorCode ierr; 5486 PetscMPIInt size; 5487 5488 PetscFunctionBegin; 5489 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5490 5491 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5492 B->data = (void*)b; 5493 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5494 B->assembled = PETSC_FALSE; 5495 B->insertmode = NOT_SET_VALUES; 5496 b->size = size; 5497 5498 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5499 5500 /* build cache for off array entries formed */ 5501 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5502 5503 b->donotstash = PETSC_FALSE; 5504 b->colmap = 0; 5505 b->garray = 0; 5506 b->roworiented = PETSC_TRUE; 5507 5508 /* stuff used for matrix vector multiply */ 5509 b->lvec = NULL; 5510 b->Mvctx = NULL; 5511 5512 /* stuff for MatGetRow() */ 5513 b->rowindices = 0; 5514 b->rowvalues = 0; 5515 b->getrowactive = PETSC_FALSE; 5516 5517 /* flexible pointer used in CUSP/CUSPARSE classes */ 5518 b->spptr = NULL; 5519 5520 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5522 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5529 #if defined(PETSC_HAVE_MKL_SPARSE) 5530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5531 #endif 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5533 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5534 #if defined(PETSC_HAVE_ELEMENTAL) 5535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5536 #endif 5537 #if defined(PETSC_HAVE_HYPRE) 5538 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5539 #endif 5540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5541 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5545 #if defined(PETSC_HAVE_HYPRE) 5546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5547 #endif 5548 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5549 PetscFunctionReturn(0); 5550 } 5551 5552 /*@C 5553 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5554 and "off-diagonal" part of the matrix in CSR format. 5555 5556 Collective on MPI_Comm 5557 5558 Input Parameters: 5559 + comm - MPI communicator 5560 . m - number of local rows (Cannot be PETSC_DECIDE) 5561 . n - This value should be the same as the local size used in creating the 5562 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5563 calculated if N is given) For square matrices n is almost always m. 5564 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5565 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5566 . i - row indices for "diagonal" portion of matrix 5567 . j - column indices 5568 . a - matrix values 5569 . oi - row indices for "off-diagonal" portion of matrix 5570 . oj - column indices 5571 - oa - matrix values 5572 5573 Output Parameter: 5574 . mat - the matrix 5575 5576 Level: advanced 5577 5578 Notes: 5579 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5580 must free the arrays once the matrix has been destroyed and not before. 5581 5582 The i and j indices are 0 based 5583 5584 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5585 5586 This sets local rows and cannot be used to set off-processor values. 5587 5588 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5589 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5590 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5591 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5592 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5593 communication if it is known that only local entries will be set. 5594 5595 .keywords: matrix, aij, compressed row, sparse, parallel 5596 5597 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5598 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5599 @*/ 5600 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5601 { 5602 PetscErrorCode ierr; 5603 Mat_MPIAIJ *maij; 5604 5605 PetscFunctionBegin; 5606 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5607 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5608 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5609 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5610 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5611 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5612 maij = (Mat_MPIAIJ*) (*mat)->data; 5613 5614 (*mat)->preallocated = PETSC_TRUE; 5615 5616 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5617 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5618 5619 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5620 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5621 5622 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5623 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5624 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 5627 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5628 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5629 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5630 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5631 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5632 PetscFunctionReturn(0); 5633 } 5634 5635 /* 5636 Special version for direct calls from Fortran 5637 */ 5638 #include <petsc/private/fortranimpl.h> 5639 5640 /* Change these macros so can be used in void function */ 5641 #undef CHKERRQ 5642 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5643 #undef SETERRQ2 5644 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5645 #undef SETERRQ3 5646 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5647 #undef SETERRQ 5648 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5649 5650 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5651 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5653 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5654 #else 5655 #endif 5656 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5657 { 5658 Mat mat = *mmat; 5659 PetscInt m = *mm, n = *mn; 5660 InsertMode addv = *maddv; 5661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5662 PetscScalar value; 5663 PetscErrorCode ierr; 5664 5665 MatCheckPreallocated(mat,1); 5666 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5667 5668 #if defined(PETSC_USE_DEBUG) 5669 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5670 #endif 5671 { 5672 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5673 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5674 PetscBool roworiented = aij->roworiented; 5675 5676 /* Some Variables required in the macro */ 5677 Mat A = aij->A; 5678 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5679 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5680 MatScalar *aa = a->a; 5681 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5682 Mat B = aij->B; 5683 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5684 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5685 MatScalar *ba = b->a; 5686 5687 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5688 PetscInt nonew = a->nonew; 5689 MatScalar *ap1,*ap2; 5690 5691 PetscFunctionBegin; 5692 for (i=0; i<m; i++) { 5693 if (im[i] < 0) continue; 5694 #if defined(PETSC_USE_DEBUG) 5695 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5696 #endif 5697 if (im[i] >= rstart && im[i] < rend) { 5698 row = im[i] - rstart; 5699 lastcol1 = -1; 5700 rp1 = aj + ai[row]; 5701 ap1 = aa + ai[row]; 5702 rmax1 = aimax[row]; 5703 nrow1 = ailen[row]; 5704 low1 = 0; 5705 high1 = nrow1; 5706 lastcol2 = -1; 5707 rp2 = bj + bi[row]; 5708 ap2 = ba + bi[row]; 5709 rmax2 = bimax[row]; 5710 nrow2 = bilen[row]; 5711 low2 = 0; 5712 high2 = nrow2; 5713 5714 for (j=0; j<n; j++) { 5715 if (roworiented) value = v[i*n+j]; 5716 else value = v[i+j*m]; 5717 if (in[j] >= cstart && in[j] < cend) { 5718 col = in[j] - cstart; 5719 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5720 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5721 } else if (in[j] < 0) continue; 5722 #if defined(PETSC_USE_DEBUG) 5723 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5724 #endif 5725 else { 5726 if (mat->was_assembled) { 5727 if (!aij->colmap) { 5728 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5729 } 5730 #if defined(PETSC_USE_CTABLE) 5731 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5732 col--; 5733 #else 5734 col = aij->colmap[in[j]] - 1; 5735 #endif 5736 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5737 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5738 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5739 col = in[j]; 5740 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5741 B = aij->B; 5742 b = (Mat_SeqAIJ*)B->data; 5743 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5744 rp2 = bj + bi[row]; 5745 ap2 = ba + bi[row]; 5746 rmax2 = bimax[row]; 5747 nrow2 = bilen[row]; 5748 low2 = 0; 5749 high2 = nrow2; 5750 bm = aij->B->rmap->n; 5751 ba = b->a; 5752 } 5753 } else col = in[j]; 5754 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5755 } 5756 } 5757 } else if (!aij->donotstash) { 5758 if (roworiented) { 5759 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5760 } else { 5761 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } 5763 } 5764 } 5765 } 5766 PetscFunctionReturnVoid(); 5767 } 5768 5769