1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 PetscBool merged; 977 978 PetscFunctionBegin; 979 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 980 /* do nondiagonal part */ 981 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 982 if (!merged) { 983 /* send it on its way */ 984 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 985 /* do local part */ 986 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 987 /* receive remote parts: note this assumes the values are not actually */ 988 /* added in yy until the next line, */ 989 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 } else { 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* send it on its way */ 994 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 /* values actually were received in the Begin() but we need to call this nop */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 /* Easy test: symmetric diagonal block */ 1013 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1014 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1015 if (!*f) PetscFunctionReturn(0); 1016 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1018 if (size == 1) PetscFunctionReturn(0); 1019 1020 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1021 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1022 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1024 for (i=0; i<first; i++) notme[i] = i; 1025 for (i=last; i<M; i++) notme[i-last+first] = i; 1026 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1027 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1028 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1029 Aoff = Aoffs[0]; 1030 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1031 Boff = Boffs[0]; 1032 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1033 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1035 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1037 ierr = PetscFree(notme);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1042 { 1043 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1044 PetscErrorCode ierr; 1045 1046 PetscFunctionBegin; 1047 /* do nondiagonal part */ 1048 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1049 /* send it on its way */ 1050 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 /* do local part */ 1052 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1053 /* receive remote parts */ 1054 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 /* 1059 This only works correctly for square matrices where the subblock A->A is the 1060 diagonal block 1061 */ 1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1063 { 1064 PetscErrorCode ierr; 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1066 1067 PetscFunctionBegin; 1068 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1069 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1070 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1081 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1086 { 1087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 #if defined(PETSC_USE_LOG) 1092 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1093 #endif 1094 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1095 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1096 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1097 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1098 #if defined(PETSC_USE_CTABLE) 1099 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1100 #else 1101 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1102 #endif 1103 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1104 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1105 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1106 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1107 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1108 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1109 1110 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1112 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1113 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1114 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1115 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1118 #if defined(PETSC_HAVE_ELEMENTAL) 1119 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1120 #endif 1121 #if defined(PETSC_HAVE_HYPRE) 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1124 #endif 1125 PetscFunctionReturn(0); 1126 } 1127 1128 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1129 { 1130 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1131 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1132 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1133 PetscErrorCode ierr; 1134 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1135 int fd; 1136 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1137 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1138 PetscScalar *column_values; 1139 PetscInt message_count,flowcontrolcount; 1140 FILE *file; 1141 1142 PetscFunctionBegin; 1143 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1144 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1145 nz = A->nz + B->nz; 1146 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1147 if (!rank) { 1148 header[0] = MAT_FILE_CLASSID; 1149 header[1] = mat->rmap->N; 1150 header[2] = mat->cmap->N; 1151 1152 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1153 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1154 /* get largest number of rows any processor has */ 1155 rlen = mat->rmap->n; 1156 range = mat->rmap->range; 1157 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1158 } else { 1159 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1160 rlen = mat->rmap->n; 1161 } 1162 1163 /* load up the local row counts */ 1164 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1165 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1166 1167 /* store the row lengths to the file */ 1168 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1169 if (!rank) { 1170 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1171 for (i=1; i<size; i++) { 1172 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1173 rlen = range[i+1] - range[i]; 1174 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 } 1177 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1178 } else { 1179 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1180 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1182 } 1183 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1184 1185 /* load up the local column indices */ 1186 nzmax = nz; /* th processor needs space a largest processor needs */ 1187 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1188 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1189 cnt = 0; 1190 for (i=0; i<mat->rmap->n; i++) { 1191 for (j=B->i[i]; j<B->i[i+1]; j++) { 1192 if ((col = garray[B->j[j]]) > cstart) break; 1193 column_indices[cnt++] = col; 1194 } 1195 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1196 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1197 } 1198 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1199 1200 /* store the column indices to the file */ 1201 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1202 if (!rank) { 1203 MPI_Status status; 1204 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 for (i=1; i<size; i++) { 1206 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1207 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1208 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1209 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1211 } 1212 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1213 } else { 1214 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1215 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1216 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1218 } 1219 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1220 1221 /* load up the local column values */ 1222 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1223 cnt = 0; 1224 for (i=0; i<mat->rmap->n; i++) { 1225 for (j=B->i[i]; j<B->i[i+1]; j++) { 1226 if (garray[B->j[j]] > cstart) break; 1227 column_values[cnt++] = B->a[j]; 1228 } 1229 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1230 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1231 } 1232 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1233 1234 /* store the column values to the file */ 1235 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1236 if (!rank) { 1237 MPI_Status status; 1238 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1239 for (i=1; i<size; i++) { 1240 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1241 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1242 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1243 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1245 } 1246 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1247 } else { 1248 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1249 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(column_values);CHKERRQ(ierr); 1254 1255 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1256 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #include <petscdraw.h> 1261 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1264 PetscErrorCode ierr; 1265 PetscMPIInt rank = aij->rank,size = aij->size; 1266 PetscBool isdraw,iascii,isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1272 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1273 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1274 if (iascii) { 1275 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1278 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1279 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 for (i=0; i<(PetscInt)size; i++) { 1281 nmax = PetscMax(nmax,nz[i]); 1282 nmin = PetscMin(nmin,nz[i]); 1283 navg += nz[i]; 1284 } 1285 ierr = PetscFree(nz);CHKERRQ(ierr); 1286 navg = navg/size; 1287 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscBool inodes; 1294 1295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1313 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount,inodelimit,*inodes; 1317 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1318 if (inodes) { 1319 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(0); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 } else { 1332 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1333 } 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1339 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { 1344 /* assemble the entire matrix onto first processor. */ 1345 Mat A; 1346 Mat_SeqAIJ *Aloc; 1347 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1348 MatScalar *a; 1349 1350 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1355 } 1356 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1357 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1358 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1359 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1360 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1361 1362 /* copy over the A part */ 1363 Aloc = (Mat_SeqAIJ*)aij->A->data; 1364 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1365 row = mat->rmap->rstart; 1366 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1367 for (i=0; i<m; i++) { 1368 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1369 row++; 1370 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1371 } 1372 aj = Aloc->j; 1373 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1374 1375 /* copy over the B part */ 1376 Aloc = (Mat_SeqAIJ*)aij->B->data; 1377 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1378 row = mat->rmap->rstart; 1379 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1380 ct = cols; 1381 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; 1385 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1386 } 1387 ierr = PetscFree(ct);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 if (!rank) { 1396 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1397 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1398 } 1399 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1401 ierr = MatDestroy(&A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1407 { 1408 PetscErrorCode ierr; 1409 PetscBool iascii,isdraw,issocket,isbinary; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1416 if (iascii || isdraw || isbinary || issocket) { 1417 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1425 PetscErrorCode ierr; 1426 Vec bb1 = 0; 1427 PetscBool hasop; 1428 1429 PetscFunctionBegin; 1430 if (flag == SOR_APPLY_UPPER) { 1431 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1436 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1437 } 1438 1439 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1440 if (flag & SOR_ZERO_INITIAL_GUESS) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 its--; 1443 } 1444 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1473 if (flag & SOR_ZERO_INITIAL_GUESS) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 its--; 1476 } 1477 while (its--) { 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 1481 /* update rhs: bb1 = bb - B*x */ 1482 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1484 1485 /* local sweep */ 1486 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1487 } 1488 } else if (flag & SOR_EISENSTAT) { 1489 Vec xx1; 1490 1491 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1492 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1493 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 if (!mat->diag) { 1497 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1498 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1499 } 1500 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1501 if (hasop) { 1502 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1503 } else { 1504 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1505 } 1506 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1507 1508 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1512 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1513 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1514 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1515 1516 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1517 1518 matin->factorerrortype = mat->A->factorerrortype; 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1523 { 1524 Mat aA,aB,Aperm; 1525 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1526 PetscScalar *aa,*ba; 1527 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1528 PetscSF rowsf,sf; 1529 IS parcolp = NULL; 1530 PetscBool done; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1535 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1536 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1537 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1549 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1550 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1559 1560 /* Find out where my gcols should go */ 1561 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1567 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1572 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i],rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt cowner,col = cdest[aj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 for (j=bi[i]; j<bi[i+1]; j++) { 1583 PetscInt cowner,col = gcdest[bj[j]]; 1584 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1585 if (rowner == cowner) dnnz[i]++; 1586 else onnz[i]++; 1587 } 1588 } 1589 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1594 1595 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1596 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) { 1599 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1600 PetscInt j0,rowlen; 1601 rowlen = ai[i+1] - ai[i]; 1602 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1603 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1604 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 rowlen = bi[i+1] - bi[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { 1608 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1609 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1613 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1615 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1616 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1618 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1620 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1621 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1622 *B = Aperm; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1627 { 1628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1629 PetscErrorCode ierr; 1630 1631 PetscFunctionBegin; 1632 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1633 if (ghosts) *ghosts = aij->garray; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_SPD: 1716 A->spd_set = PETSC_TRUE; 1717 A->spd = flg; 1718 if (flg) { 1719 A->symmetric = PETSC_TRUE; 1720 A->structurally_symmetric = PETSC_TRUE; 1721 A->symmetric_set = PETSC_TRUE; 1722 A->structurally_symmetric_set = PETSC_TRUE; 1723 } 1724 break; 1725 case MAT_SYMMETRIC: 1726 MatCheckPreallocated(A,1); 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_HERMITIAN: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_SYMMETRY_ETERNAL: 1738 MatCheckPreallocated(A,1); 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_SUBMAT_SINGLEIS: 1742 A->submat_singleis = flg; 1743 break; 1744 case MAT_STRUCTURE_ONLY: 1745 /* The option is handled directly by MatSetOption() */ 1746 break; 1747 default: 1748 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1754 { 1755 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1756 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1757 PetscErrorCode ierr; 1758 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1759 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1760 PetscInt *cmap,*idx_p; 1761 1762 PetscFunctionBegin; 1763 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1764 mat->getrowactive = PETSC_TRUE; 1765 1766 if (!mat->rowvalues && (idx || v)) { 1767 /* 1768 allocate enough space to hold information from the longest row. 1769 */ 1770 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1771 PetscInt max = 1,tmp; 1772 for (i=0; i<matin->rmap->n; i++) { 1773 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1774 if (max < tmp) max = tmp; 1775 } 1776 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1777 } 1778 1779 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1780 lrow = row - rstart; 1781 1782 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1783 if (!v) {pvA = 0; pvB = 0;} 1784 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1785 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1786 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1787 nztot = nzA + nzB; 1788 1789 cmap = mat->garray; 1790 if (v || idx) { 1791 if (nztot) { 1792 /* Sort by increasing column numbers, assuming A and B already sorted */ 1793 PetscInt imark = -1; 1794 if (v) { 1795 *v = v_p = mat->rowvalues; 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1798 else break; 1799 } 1800 imark = i; 1801 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1802 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1803 } 1804 if (idx) { 1805 *idx = idx_p = mat->rowindices; 1806 if (imark > -1) { 1807 for (i=0; i<imark; i++) { 1808 idx_p[i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1813 else break; 1814 } 1815 imark = i; 1816 } 1817 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1818 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1819 } 1820 } else { 1821 if (idx) *idx = 0; 1822 if (v) *v = 0; 1823 } 1824 } 1825 *nz = nztot; 1826 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1827 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1845 PetscErrorCode ierr; 1846 PetscInt i,j,cstart = mat->cmap->rstart; 1847 PetscReal sum = 0.0; 1848 MatScalar *v; 1849 1850 PetscFunctionBegin; 1851 if (aij->size == 1) { 1852 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1853 } else { 1854 if (type == NORM_FROBENIUS) { 1855 v = amat->a; 1856 for (i=0; i<amat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1858 } 1859 v = bmat->a; 1860 for (i=0; i<bmat->nz; i++) { 1861 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1862 } 1863 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 *norm = PetscSqrtReal(*norm); 1865 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1866 } else if (type == NORM_1) { /* max column norm */ 1867 PetscReal *tmp,*tmp2; 1868 PetscInt *jj,*garray = aij->garray; 1869 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1870 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1871 *norm = 0.0; 1872 v = amat->a; jj = amat->j; 1873 for (j=0; j<amat->nz; j++) { 1874 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1875 } 1876 v = bmat->a; jj = bmat->j; 1877 for (j=0; j<bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1879 } 1880 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1881 for (j=0; j<mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 ierr = PetscFree(tmp);CHKERRQ(ierr); 1885 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1886 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1912 PetscErrorCode ierr; 1913 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1914 PetscInt cstart = A->cmap->rstart,ncol; 1915 Mat B; 1916 MatScalar *array; 1917 1918 PetscFunctionBegin; 1919 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1920 1921 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1922 ai = Aloc->i; aj = Aloc->j; 1923 bi = Bloc->i; bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz,*g_nnz,*o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1930 /* compute d_nnz for preallocation */ 1931 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<ai[ma]; i++) { 1933 d_nnz[aj[i]]++; 1934 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1935 } 1936 /* compute local off-diagonal contributions */ 1937 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1939 /* map those to global */ 1940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1941 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1942 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1943 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1950 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1951 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1953 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1954 } else { 1955 B = *matout; 1956 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 /* copy over the A part */ 1961 array = Aloc->a; 1962 row = A->rmap->rstart; 1963 for (i=0; i<ma; i++) { 1964 ncol = ai[i+1]-ai[i]; 1965 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1966 row++; 1967 array += ncol; aj += ncol; 1968 } 1969 aj = Aloc->j; 1970 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1971 1972 /* copy over the B part */ 1973 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1974 array = Bloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 array += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 1986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1999 Mat a = aij->A,b = aij->B; 2000 PetscErrorCode ierr; 2001 PetscInt s1,s2,s3; 2002 2003 PetscFunctionBegin; 2004 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2005 if (rr) { 2006 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2007 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2008 /* Overlap communication with computation. */ 2009 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } 2011 if (ll) { 2012 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2013 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2014 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2015 } 2016 /* scale the diagonal block */ 2017 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2018 2019 if (rr) { 2020 /* Do a scatter end and then right scale the off-diagonal block */ 2021 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2038 { 2039 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2040 Mat a,b,c,d; 2041 PetscBool flg; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; b = matA->B; 2046 c = matB->A; d = matB->B; 2047 2048 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2049 if (flg) { 2050 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2051 } 2052 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2057 { 2058 PetscErrorCode ierr; 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2061 2062 PetscFunctionBegin; 2063 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2064 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2065 /* because of the column compression in the off-processor part of the matrix a->B, 2066 the number of columns in a->B and b->B may be different, hence we cannot call 2067 the MatCopy() directly on the two parts. If need be, we can provide a more 2068 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2069 then copying the submatrices */ 2070 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2071 } else { 2072 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2073 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2074 } 2075 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 /* 2089 Computes the number of nonzeros per row needed for preallocation when X and Y 2090 have different nonzero structure. 2091 */ 2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2093 { 2094 PetscInt i,j,k,nzx,nzy; 2095 2096 PetscFunctionBegin; 2097 /* Set the number of nonzeros in the new matrix */ 2098 for (i=0; i<m; i++) { 2099 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2100 nzx = xi[i+1] - xi[i]; 2101 nzy = yi[i+1] - yi[i]; 2102 nnz[i] = 0; 2103 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2104 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2105 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2106 nnz[i]++; 2107 } 2108 for (; k<nzy; k++) nnz[i]++; 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt m = Y->rmap->N; 2118 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2119 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2120 2121 PetscFunctionBegin; 2122 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2130 PetscBLASInt bnz,one=1; 2131 Mat_SeqAIJ *x,*y; 2132 2133 PetscFunctionBegin; 2134 if (str == SAME_NONZERO_PATTERN) { 2135 PetscScalar alpha = a; 2136 x = (Mat_SeqAIJ*)xx->A->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 y = (Mat_SeqAIJ*)yy->A->data; 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2143 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2144 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2145 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2146 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2147 } else { 2148 Mat B; 2149 PetscInt *nnz_d,*nnz_o; 2150 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2152 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2153 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2154 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2155 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2156 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2157 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2158 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2159 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2160 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2161 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2162 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2163 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2169 2170 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2171 { 2172 #if defined(PETSC_USE_COMPLEX) 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2178 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2179 #else 2180 PetscFunctionBegin; 2181 #endif 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2192 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2197 { 2198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2203 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 PetscInt i,*idxb = 0; 2212 PetscScalar *va,*vb; 2213 Vec vtmp; 2214 2215 PetscFunctionBegin; 2216 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2217 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2218 if (idx) { 2219 for (i=0; i<A->rmap->n; i++) { 2220 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2221 } 2222 } 2223 2224 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2225 if (idx) { 2226 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2227 } 2228 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2229 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2230 2231 for (i=0; i<A->rmap->n; i++) { 2232 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2233 va[i] = vb[i]; 2234 if (idx) idx[i] = a->garray[idxb[i]]; 2235 } 2236 } 2237 2238 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2239 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2240 ierr = PetscFree(idxb);CHKERRQ(ierr); 2241 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 PetscInt i,*idxb = 0; 2250 PetscScalar *va,*vb; 2251 Vec vtmp; 2252 2253 PetscFunctionBegin; 2254 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2255 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2256 if (idx) { 2257 for (i=0; i<A->cmap->n; i++) { 2258 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2259 } 2260 } 2261 2262 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2263 if (idx) { 2264 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2265 } 2266 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2267 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2268 2269 for (i=0; i<A->rmap->n; i++) { 2270 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2271 va[i] = vb[i]; 2272 if (idx) idx[i] = a->garray[idxb[i]]; 2273 } 2274 } 2275 2276 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2277 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2278 ierr = PetscFree(idxb);CHKERRQ(ierr); 2279 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2284 { 2285 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2286 PetscInt n = A->rmap->n; 2287 PetscInt cstart = A->cmap->rstart; 2288 PetscInt *cmap = mat->garray; 2289 PetscInt *diagIdx, *offdiagIdx; 2290 Vec diagV, offdiagV; 2291 PetscScalar *a, *diagA, *offdiagA; 2292 PetscInt r; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2297 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2298 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2299 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2300 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2301 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2302 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2303 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2304 for (r = 0; r < n; ++r) { 2305 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 idx[r] = cstart + diagIdx[r]; 2308 } else { 2309 a[r] = offdiagA[r]; 2310 idx[r] = cmap[offdiagIdx[r]]; 2311 } 2312 } 2313 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2316 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2317 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2318 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt n = A->rmap->n; 2326 PetscInt cstart = A->cmap->rstart; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 PetscInt r; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2336 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2337 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2338 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2339 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2341 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2342 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2343 for (r = 0; r < n; ++r) { 2344 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2345 a[r] = diagA[r]; 2346 idx[r] = cstart + diagIdx[r]; 2347 } else { 2348 a[r] = offdiagA[r]; 2349 idx[r] = cmap[offdiagIdx[r]]; 2350 } 2351 } 2352 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2355 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2356 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2357 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2362 { 2363 PetscErrorCode ierr; 2364 Mat *dummy; 2365 2366 PetscFunctionBegin; 2367 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2368 *newmat = *dummy; 2369 ierr = PetscFree(dummy);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 2373 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2380 A->factorerrortype = a->A->factorerrortype; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2385 { 2386 PetscErrorCode ierr; 2387 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2391 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2398 { 2399 PetscFunctionBegin; 2400 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2401 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /*@ 2406 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2407 2408 Collective on Mat 2409 2410 Input Parameters: 2411 + A - the matrix 2412 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2413 2414 Level: advanced 2415 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2418 { 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2427 { 2428 PetscErrorCode ierr; 2429 PetscBool sc = PETSC_FALSE,flg; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2433 ierr = PetscObjectOptionsBegin((PetscObject)A); 2434 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2435 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2436 if (flg) { 2437 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2438 } 2439 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2444 { 2445 PetscErrorCode ierr; 2446 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2447 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2448 2449 PetscFunctionBegin; 2450 if (!Y->preallocated) { 2451 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2452 } else if (!aij->nz) { 2453 PetscInt nonew = aij->nonew; 2454 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2455 aij->nonew = nonew; 2456 } 2457 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2462 { 2463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2468 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2469 if (d) { 2470 PetscInt rstart; 2471 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2472 *d += rstart; 2473 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 /* -------------------------------------------------------------------*/ 2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2481 MatGetRow_MPIAIJ, 2482 MatRestoreRow_MPIAIJ, 2483 MatMult_MPIAIJ, 2484 /* 4*/ MatMultAdd_MPIAIJ, 2485 MatMultTranspose_MPIAIJ, 2486 MatMultTransposeAdd_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 /*10*/ 0, 2491 0, 2492 0, 2493 MatSOR_MPIAIJ, 2494 MatTranspose_MPIAIJ, 2495 /*15*/ MatGetInfo_MPIAIJ, 2496 MatEqual_MPIAIJ, 2497 MatGetDiagonal_MPIAIJ, 2498 MatDiagonalScale_MPIAIJ, 2499 MatNorm_MPIAIJ, 2500 /*20*/ MatAssemblyBegin_MPIAIJ, 2501 MatAssemblyEnd_MPIAIJ, 2502 MatSetOption_MPIAIJ, 2503 MatZeroEntries_MPIAIJ, 2504 /*24*/ MatZeroRows_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*29*/ MatSetUp_MPIAIJ, 2510 0, 2511 0, 2512 MatGetDiagonalBlock_MPIAIJ, 2513 0, 2514 /*34*/ MatDuplicate_MPIAIJ, 2515 0, 2516 0, 2517 0, 2518 0, 2519 /*39*/ MatAXPY_MPIAIJ, 2520 MatCreateSubMatrices_MPIAIJ, 2521 MatIncreaseOverlap_MPIAIJ, 2522 MatGetValues_MPIAIJ, 2523 MatCopy_MPIAIJ, 2524 /*44*/ MatGetRowMax_MPIAIJ, 2525 MatScale_MPIAIJ, 2526 MatShift_MPIAIJ, 2527 MatDiagonalSet_MPIAIJ, 2528 MatZeroRowsColumns_MPIAIJ, 2529 /*49*/ MatSetRandom_MPIAIJ, 2530 0, 2531 0, 2532 0, 2533 0, 2534 /*54*/ MatFDColoringCreate_MPIXAIJ, 2535 0, 2536 MatSetUnfactored_MPIAIJ, 2537 MatPermute_MPIAIJ, 2538 0, 2539 /*59*/ MatCreateSubMatrix_MPIAIJ, 2540 MatDestroy_MPIAIJ, 2541 MatView_MPIAIJ, 2542 0, 2543 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2544 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2545 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2550 MatGetRowMinAbs_MPIAIJ, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*75*/ MatFDColoringApply_AIJ, 2556 MatSetFromOptions_MPIAIJ, 2557 0, 2558 0, 2559 MatFindZeroDiagonals_MPIAIJ, 2560 /*80*/ 0, 2561 0, 2562 0, 2563 /*83*/ MatLoad_MPIAIJ, 2564 0, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2570 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2571 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2572 MatPtAP_MPIAIJ_MPIAIJ, 2573 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2574 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*99*/ 0, 2580 0, 2581 0, 2582 MatConjugate_MPIAIJ, 2583 0, 2584 /*104*/MatSetValuesRow_MPIAIJ, 2585 MatRealPart_MPIAIJ, 2586 MatImaginaryPart_MPIAIJ, 2587 0, 2588 0, 2589 /*109*/0, 2590 0, 2591 MatGetRowMin_MPIAIJ, 2592 0, 2593 MatMissingDiagonal_MPIAIJ, 2594 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2595 0, 2596 MatGetGhosts_MPIAIJ, 2597 0, 2598 0, 2599 /*119*/0, 2600 0, 2601 0, 2602 0, 2603 MatGetMultiProcBlock_MPIAIJ, 2604 /*124*/MatFindNonzeroRows_MPIAIJ, 2605 MatGetColumnNorms_MPIAIJ, 2606 MatInvertBlockDiagonal_MPIAIJ, 2607 0, 2608 MatCreateSubMatricesMPI_MPIAIJ, 2609 /*129*/0, 2610 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2611 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2612 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2613 0, 2614 /*134*/0, 2615 0, 2616 MatRARt_MPIAIJ_MPIAIJ, 2617 0, 2618 0, 2619 /*139*/MatSetBlockSizes_MPIAIJ, 2620 0, 2621 0, 2622 MatFDColoringSetUp_MPIXAIJ, 2623 MatFindOffBlockDiagonalEntries_MPIAIJ, 2624 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2625 }; 2626 2627 /* ----------------------------------------------------------------------------------------*/ 2628 2629 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2630 { 2631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2641 { 2642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2647 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2652 { 2653 Mat_MPIAIJ *b; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2658 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2659 b = (Mat_MPIAIJ*)B->data; 2660 2661 #if defined(PETSC_USE_CTABLE) 2662 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2665 #endif 2666 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2667 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2668 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2669 2670 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2671 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2672 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2673 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2674 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2675 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2677 2678 if (!B->preallocated) { 2679 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2680 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2681 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2682 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2684 } 2685 2686 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2687 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2688 B->preallocated = PETSC_TRUE; 2689 B->was_assembled = PETSC_FALSE; 2690 B->assembled = PETSC_FALSE;; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2695 { 2696 Mat mat; 2697 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2698 PetscErrorCode ierr; 2699 2700 PetscFunctionBegin; 2701 *newmat = 0; 2702 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2703 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2704 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2705 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2706 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2707 a = (Mat_MPIAIJ*)mat->data; 2708 2709 mat->factortype = matin->factortype; 2710 mat->assembled = PETSC_TRUE; 2711 mat->insertmode = NOT_SET_VALUES; 2712 mat->preallocated = PETSC_TRUE; 2713 2714 a->size = oldmat->size; 2715 a->rank = oldmat->rank; 2716 a->donotstash = oldmat->donotstash; 2717 a->roworiented = oldmat->roworiented; 2718 a->rowindices = 0; 2719 a->rowvalues = 0; 2720 a->getrowactive = PETSC_FALSE; 2721 2722 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2723 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2724 2725 if (oldmat->colmap) { 2726 #if defined(PETSC_USE_CTABLE) 2727 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2728 #else 2729 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2730 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2731 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2732 #endif 2733 } else a->colmap = 0; 2734 if (oldmat->garray) { 2735 PetscInt len; 2736 len = oldmat->B->cmap->n; 2737 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2738 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2739 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2740 } else a->garray = 0; 2741 2742 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2743 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2744 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2745 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2746 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2747 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2748 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2749 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2750 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2751 *newmat = mat; 2752 PetscFunctionReturn(0); 2753 } 2754 2755 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2756 { 2757 PetscScalar *vals,*svals; 2758 MPI_Comm comm; 2759 PetscErrorCode ierr; 2760 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2761 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2762 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2763 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2764 PetscInt cend,cstart,n,*rowners; 2765 int fd; 2766 PetscInt bs = newMat->rmap->bs; 2767 2768 PetscFunctionBegin; 2769 /* force binary viewer to load .info file if it has not yet done so */ 2770 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2771 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2773 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2774 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2775 if (!rank) { 2776 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2777 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2778 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2779 } 2780 2781 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2782 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2783 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2784 if (bs < 0) bs = 1; 2785 2786 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2787 M = header[1]; N = header[2]; 2788 2789 /* If global sizes are set, check if they are consistent with that given in the file */ 2790 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2791 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2792 2793 /* determine ownership of all (block) rows */ 2794 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2795 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2796 else m = newMat->rmap->n; /* Set by user */ 2797 2798 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2799 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2800 2801 /* First process needs enough room for process with most rows */ 2802 if (!rank) { 2803 mmax = rowners[1]; 2804 for (i=2; i<=size; i++) { 2805 mmax = PetscMax(mmax, rowners[i]); 2806 } 2807 } else mmax = -1; /* unused, but compilers complain */ 2808 2809 rowners[0] = 0; 2810 for (i=2; i<=size; i++) { 2811 rowners[i] += rowners[i-1]; 2812 } 2813 rstart = rowners[rank]; 2814 rend = rowners[rank+1]; 2815 2816 /* distribute row lengths to all processors */ 2817 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2818 if (!rank) { 2819 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2820 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2821 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2822 for (j=0; j<m; j++) { 2823 procsnz[0] += ourlens[j]; 2824 } 2825 for (i=1; i<size; i++) { 2826 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2827 /* calculate the number of nonzeros on each processor */ 2828 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2829 procsnz[i] += rowlengths[j]; 2830 } 2831 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2832 } 2833 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2834 } else { 2835 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2836 } 2837 2838 if (!rank) { 2839 /* determine max buffer needed and allocate it */ 2840 maxnz = 0; 2841 for (i=0; i<size; i++) { 2842 maxnz = PetscMax(maxnz,procsnz[i]); 2843 } 2844 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2845 2846 /* read in my part of the matrix column indices */ 2847 nz = procsnz[0]; 2848 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2849 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2850 2851 /* read in every one elses and ship off */ 2852 for (i=1; i<size; i++) { 2853 nz = procsnz[i]; 2854 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2855 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2856 } 2857 ierr = PetscFree(cols);CHKERRQ(ierr); 2858 } else { 2859 /* determine buffer space needed for message */ 2860 nz = 0; 2861 for (i=0; i<m; i++) { 2862 nz += ourlens[i]; 2863 } 2864 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2865 2866 /* receive message of column indices*/ 2867 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2868 } 2869 2870 /* determine column ownership if matrix is not square */ 2871 if (N != M) { 2872 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2873 else n = newMat->cmap->n; 2874 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2875 cstart = cend - n; 2876 } else { 2877 cstart = rstart; 2878 cend = rend; 2879 n = cend - cstart; 2880 } 2881 2882 /* loop over local rows, determining number of off diagonal entries */ 2883 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2884 jj = 0; 2885 for (i=0; i<m; i++) { 2886 for (j=0; j<ourlens[i]; j++) { 2887 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2888 jj++; 2889 } 2890 } 2891 2892 for (i=0; i<m; i++) { 2893 ourlens[i] -= offlens[i]; 2894 } 2895 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2896 2897 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2898 2899 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2900 2901 for (i=0; i<m; i++) { 2902 ourlens[i] += offlens[i]; 2903 } 2904 2905 if (!rank) { 2906 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2907 2908 /* read in my part of the matrix numerical values */ 2909 nz = procsnz[0]; 2910 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2911 2912 /* insert into matrix */ 2913 jj = rstart; 2914 smycols = mycols; 2915 svals = vals; 2916 for (i=0; i<m; i++) { 2917 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2918 smycols += ourlens[i]; 2919 svals += ourlens[i]; 2920 jj++; 2921 } 2922 2923 /* read in other processors and ship out */ 2924 for (i=1; i<size; i++) { 2925 nz = procsnz[i]; 2926 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2927 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2928 } 2929 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2930 } else { 2931 /* receive numeric values */ 2932 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2933 2934 /* receive message of values*/ 2935 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2936 2937 /* insert into matrix */ 2938 jj = rstart; 2939 smycols = mycols; 2940 svals = vals; 2941 for (i=0; i<m; i++) { 2942 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2943 smycols += ourlens[i]; 2944 svals += ourlens[i]; 2945 jj++; 2946 } 2947 } 2948 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2949 ierr = PetscFree(vals);CHKERRQ(ierr); 2950 ierr = PetscFree(mycols);CHKERRQ(ierr); 2951 ierr = PetscFree(rowners);CHKERRQ(ierr); 2952 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 PetscFunctionReturn(0); 2955 } 2956 2957 /* Not scalable because of ISAllGather() unless getting all columns. */ 2958 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2959 { 2960 PetscErrorCode ierr; 2961 IS iscol_local; 2962 PetscBool isstride; 2963 PetscMPIInt lisstride=0,gisstride; 2964 2965 PetscFunctionBegin; 2966 /* check if we are grabbing all columns*/ 2967 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2968 2969 if (isstride) { 2970 PetscInt start,len,mstart,mlen; 2971 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2972 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2973 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2974 if (mstart == start && mlen-mstart == len) lisstride = 1; 2975 } 2976 2977 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2978 if (gisstride) { 2979 PetscInt N; 2980 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2981 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 2982 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 2983 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 2984 } else { 2985 PetscInt cbs; 2986 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2987 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2988 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 2989 } 2990 2991 *isseq = iscol_local; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 /* 2996 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 2997 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 2998 2999 Input Parameters: 3000 mat - matrix 3001 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3002 i.e., mat->rstart <= isrow[i] < mat->rend 3003 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3004 i.e., mat->cstart <= iscol[i] < mat->cend 3005 Output Parameter: 3006 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3007 iscol_o - sequential column index set for retrieving mat->B 3008 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3009 */ 3010 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3011 { 3012 PetscErrorCode ierr; 3013 Vec x,cmap; 3014 const PetscInt *is_idx; 3015 PetscScalar *xarray,*cmaparray; 3016 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3017 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3018 Mat B=a->B; 3019 Vec lvec=a->lvec,lcmap; 3020 PetscInt i,cstart,cend,Bn=B->cmap->N; 3021 MPI_Comm comm; 3022 3023 PetscFunctionBegin; 3024 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3025 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3026 3027 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3028 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3029 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3030 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3031 3032 /* Get start indices */ 3033 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3034 isstart -= ncols; 3035 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3036 3037 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3038 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3039 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3040 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3041 for (i=0; i<ncols; i++) { 3042 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3043 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3044 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3045 } 3046 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3047 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3048 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3049 3050 /* Get iscol_d */ 3051 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3052 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3053 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3054 3055 /* Get isrow_d */ 3056 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3057 rstart = mat->rmap->rstart; 3058 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3059 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3060 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3061 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3062 3063 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3064 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3065 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3066 3067 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3068 ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 3070 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3071 3072 ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3073 ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3074 ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3075 3076 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3077 /* off-process column indices */ 3078 count = 0; 3079 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3080 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3081 3082 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3083 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3084 for (i=0; i<Bn; i++) { 3085 if (PetscRealPart(xarray[i]) > -1.0) { 3086 idx[count] = i; /* local column index in off-diagonal part B */ 3087 cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3088 } 3089 } 3090 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3091 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3092 3093 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3094 /* cannot ensure iscol_o has same blocksize as iscol! */ 3095 3096 ierr = PetscFree(idx);CHKERRQ(ierr); 3097 3098 *garray = cmap1; 3099 3100 ierr = VecDestroy(&x);CHKERRQ(ierr); 3101 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3102 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3103 PetscFunctionReturn(0); 3104 } 3105 3106 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3107 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3108 { 3109 PetscErrorCode ierr; 3110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3111 Mat M = NULL; 3112 MPI_Comm comm; 3113 IS iscol_d,isrow_d,iscol_o; 3114 Mat Asub = NULL,Bsub = NULL; 3115 PetscInt n; 3116 3117 PetscFunctionBegin; 3118 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3119 3120 if (call == MAT_REUSE_MATRIX) { 3121 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3122 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3123 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3124 3125 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3126 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3127 3128 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3129 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3130 3131 /* Update diagonal and off-diagonal portions of submat */ 3132 asub = (Mat_MPIAIJ*)(*submat)->data; 3133 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3134 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3135 if (n) { 3136 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3137 } 3138 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3139 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 3141 } else { /* call == MAT_INITIAL_MATRIX) */ 3142 const PetscInt *garray; 3143 PetscInt BsubN; 3144 3145 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3146 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3147 3148 /* Create local submatrices Asub and Bsub */ 3149 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3150 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3151 3152 /* Create submatrix M */ 3153 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3154 3155 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3156 asub = (Mat_MPIAIJ*)M->data; 3157 3158 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3159 n = asub->B->cmap->N; 3160 if (BsubN > n) { 3161 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3162 const PetscInt *idx; 3163 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3164 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3165 3166 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3167 j = 0; 3168 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3169 for (i=0; i<n; i++) { 3170 if (j >= BsubN) break; 3171 while (subgarray[i] > garray[j]) j++; 3172 3173 if (subgarray[i] == garray[j]) { 3174 idx_new[i] = idx[j++]; 3175 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3176 } 3177 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3178 3179 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3180 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3181 3182 } else if (BsubN < n) { 3183 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3184 } 3185 3186 ierr = PetscFree(garray);CHKERRQ(ierr); 3187 *submat = M; 3188 3189 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3190 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3191 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3192 3193 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3194 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3195 3196 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3197 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3198 } 3199 PetscFunctionReturn(0); 3200 } 3201 3202 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3203 { 3204 PetscErrorCode ierr; 3205 IS iscol_local=NULL,isrow_d; 3206 PetscInt csize; 3207 PetscInt n,i,j,start,end; 3208 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3209 MPI_Comm comm; 3210 3211 PetscFunctionBegin; 3212 /* If isrow has same processor distribution as mat, 3213 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3214 if (call == MAT_REUSE_MATRIX) { 3215 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3216 if (isrow_d) { 3217 sameRowDist = PETSC_TRUE; 3218 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3219 } else { 3220 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3221 if (iscol_local) { 3222 sameRowDist = PETSC_TRUE; 3223 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3224 } 3225 } 3226 } else { 3227 /* Check if isrow has same processor distribution as mat */ 3228 sameDist[0] = PETSC_FALSE; 3229 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3230 if (!n) { 3231 sameDist[0] = PETSC_TRUE; 3232 } else { 3233 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3234 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3235 if (i >= start && j < end) { 3236 sameDist[0] = PETSC_TRUE; 3237 } 3238 } 3239 3240 /* Check if iscol has same processor distribution as mat */ 3241 sameDist[1] = PETSC_FALSE; 3242 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3243 if (!n) { 3244 sameDist[1] = PETSC_TRUE; 3245 } else { 3246 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3247 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3248 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3249 } 3250 3251 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3252 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3253 sameRowDist = tsameDist[0]; 3254 } 3255 3256 if (sameRowDist) { 3257 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3258 /* isrow and iscol have same processor distribution as mat */ 3259 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } else { /* sameRowDist */ 3262 /* isrow has same processor distribution as mat */ 3263 if (call == MAT_INITIAL_MATRIX) { 3264 PetscBool sorted; 3265 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3266 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3267 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3268 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3269 3270 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3271 if (sorted) { 3272 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3273 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3274 PetscFunctionReturn(0); 3275 } 3276 } else { /* call == MAT_REUSE_MATRIX */ 3277 IS iscol_sub; 3278 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3279 if (iscol_sub) { 3280 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3281 PetscFunctionReturn(0); 3282 } 3283 } 3284 } 3285 } 3286 3287 /* General case: iscol -> iscol_local which has global size of iscol */ 3288 if (call == MAT_REUSE_MATRIX) { 3289 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3290 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3291 } else { 3292 if (!iscol_local) { 3293 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3294 } 3295 } 3296 3297 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3298 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3299 3300 if (call == MAT_INITIAL_MATRIX) { 3301 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3302 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3303 } 3304 PetscFunctionReturn(0); 3305 } 3306 3307 /*@C 3308 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3309 and "off-diagonal" part of the matrix in CSR format. 3310 3311 Collective on MPI_Comm 3312 3313 Input Parameters: 3314 + comm - MPI communicator 3315 . A - "diagonal" portion of matrix 3316 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3317 - garray - global index of B columns 3318 3319 Output Parameter: 3320 . mat - the matrix, with input A as its local diagonal matrix 3321 Level: advanced 3322 3323 Notes: 3324 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3325 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3326 3327 .seealso: MatCreateMPIAIJWithSplitArrays() 3328 @*/ 3329 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3330 { 3331 PetscErrorCode ierr; 3332 Mat_MPIAIJ *maij; 3333 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3334 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3335 PetscScalar *oa=b->a; 3336 Mat Bnew; 3337 PetscInt m,n,N; 3338 3339 PetscFunctionBegin; 3340 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3341 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3342 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3343 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3344 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3345 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3346 3347 /* Get global columns of mat */ 3348 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3349 3350 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3351 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3352 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3353 maij = (Mat_MPIAIJ*)(*mat)->data; 3354 3355 (*mat)->preallocated = PETSC_TRUE; 3356 3357 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3358 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3359 3360 /* Set A as diagonal portion of *mat */ 3361 maij->A = A; 3362 3363 nz = oi[m]; 3364 for (i=0; i<nz; i++) { 3365 col = oj[i]; 3366 oj[i] = garray[col]; 3367 } 3368 3369 /* Set Bnew as off-diagonal portion of *mat */ 3370 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3371 bnew = (Mat_SeqAIJ*)Bnew->data; 3372 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3373 maij->B = Bnew; 3374 3375 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3376 3377 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3378 b->free_a = PETSC_FALSE; 3379 b->free_ij = PETSC_FALSE; 3380 ierr = MatDestroy(&B);CHKERRQ(ierr); 3381 3382 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3383 bnew->free_a = PETSC_TRUE; 3384 bnew->free_ij = PETSC_TRUE; 3385 3386 /* condense columns of maij->B */ 3387 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3388 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3389 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3390 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3391 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3392 PetscFunctionReturn(0); 3393 } 3394 3395 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3396 3397 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3398 { 3399 PetscErrorCode ierr; 3400 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3401 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3402 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3403 Mat M,Msub,B=a->B; 3404 MatScalar *aa; 3405 Mat_SeqAIJ *aij; 3406 PetscInt *garray = a->garray,*colsub,Ncols; 3407 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3408 IS iscol_sub,iscmap; 3409 const PetscInt *is_idx,*cmap; 3410 PetscBool allcolumns=PETSC_FALSE; 3411 MPI_Comm comm; 3412 3413 PetscFunctionBegin; 3414 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3415 3416 if (call == MAT_REUSE_MATRIX) { 3417 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3418 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3419 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3420 3421 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3422 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3423 3424 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3425 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3426 3427 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3428 3429 } else { /* call == MAT_INITIAL_MATRIX) */ 3430 PetscBool flg; 3431 3432 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3433 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3434 3435 /* (1) iscol -> nonscalable iscol_local */ 3436 /* Check for special case: each processor gets entire matrix columns */ 3437 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3438 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3439 if (allcolumns) { 3440 iscol_sub = iscol_local; 3441 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3442 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3443 3444 } else { 3445 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3446 PetscInt *idx,*cmap1,k; 3447 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3448 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3449 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3450 count = 0; 3451 k = 0; 3452 for (i=0; i<Ncols; i++) { 3453 j = is_idx[i]; 3454 if (j >= cstart && j < cend) { 3455 /* diagonal part of mat */ 3456 idx[count] = j; 3457 cmap1[count++] = i; /* column index in submat */ 3458 } else if (Bn) { 3459 /* off-diagonal part of mat */ 3460 if (j == garray[k]) { 3461 idx[count] = j; 3462 cmap1[count++] = i; /* column index in submat */ 3463 } else if (j > garray[k]) { 3464 while (j > garray[k] && k < Bn-1) k++; 3465 if (j == garray[k]) { 3466 idx[count] = j; 3467 cmap1[count++] = i; /* column index in submat */ 3468 } 3469 } 3470 } 3471 } 3472 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3473 3474 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3475 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3476 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3477 3478 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3479 } 3480 3481 /* (3) Create sequential Msub */ 3482 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3483 } 3484 3485 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3486 aij = (Mat_SeqAIJ*)(Msub)->data; 3487 ii = aij->i; 3488 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3489 3490 /* 3491 m - number of local rows 3492 Ncols - number of columns (same on all processors) 3493 rstart - first row in new global matrix generated 3494 */ 3495 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3496 3497 if (call == MAT_INITIAL_MATRIX) { 3498 /* (4) Create parallel newmat */ 3499 PetscMPIInt rank,size; 3500 PetscInt csize; 3501 3502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3503 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3504 3505 /* 3506 Determine the number of non-zeros in the diagonal and off-diagonal 3507 portions of the matrix in order to do correct preallocation 3508 */ 3509 3510 /* first get start and end of "diagonal" columns */ 3511 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3512 if (csize == PETSC_DECIDE) { 3513 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3514 if (mglobal == Ncols) { /* square matrix */ 3515 nlocal = m; 3516 } else { 3517 nlocal = Ncols/size + ((Ncols % size) > rank); 3518 } 3519 } else { 3520 nlocal = csize; 3521 } 3522 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3523 rstart = rend - nlocal; 3524 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3525 3526 /* next, compute all the lengths */ 3527 jj = aij->j; 3528 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3529 olens = dlens + m; 3530 for (i=0; i<m; i++) { 3531 jend = ii[i+1] - ii[i]; 3532 olen = 0; 3533 dlen = 0; 3534 for (j=0; j<jend; j++) { 3535 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3536 else dlen++; 3537 jj++; 3538 } 3539 olens[i] = olen; 3540 dlens[i] = dlen; 3541 } 3542 3543 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3544 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3545 3546 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3547 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3548 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3549 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3550 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3551 ierr = PetscFree(dlens);CHKERRQ(ierr); 3552 3553 } else { /* call == MAT_REUSE_MATRIX */ 3554 M = *newmat; 3555 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3556 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3557 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3558 /* 3559 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3560 rather than the slower MatSetValues(). 3561 */ 3562 M->was_assembled = PETSC_TRUE; 3563 M->assembled = PETSC_FALSE; 3564 } 3565 3566 /* (5) Set values of Msub to *newmat */ 3567 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3568 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3569 3570 jj = aij->j; 3571 aa = aij->a; 3572 for (i=0; i<m; i++) { 3573 row = rstart + i; 3574 nz = ii[i+1] - ii[i]; 3575 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3576 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3577 jj += nz; aa += nz; 3578 } 3579 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3580 3581 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3583 3584 ierr = PetscFree(colsub);CHKERRQ(ierr); 3585 3586 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3587 if (call == MAT_INITIAL_MATRIX) { 3588 *newmat = M; 3589 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3590 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3591 3592 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3593 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3594 3595 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3596 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3597 3598 if (iscol_local) { 3599 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3600 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3601 } 3602 } 3603 PetscFunctionReturn(0); 3604 } 3605 3606 /* 3607 Not great since it makes two copies of the submatrix, first an SeqAIJ 3608 in local and then by concatenating the local matrices the end result. 3609 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3610 3611 Note: This requires a sequential iscol with all indices. 3612 */ 3613 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3614 { 3615 PetscErrorCode ierr; 3616 PetscMPIInt rank,size; 3617 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3618 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3619 Mat M,Mreuse; 3620 MatScalar *aa,*vwork; 3621 MPI_Comm comm; 3622 Mat_SeqAIJ *aij; 3623 PetscBool colflag,allcolumns=PETSC_FALSE; 3624 3625 PetscFunctionBegin; 3626 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3627 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3629 3630 /* Check for special case: each processor gets entire matrix columns */ 3631 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3632 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3633 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3634 3635 if (call == MAT_REUSE_MATRIX) { 3636 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3637 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3638 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3639 } else { 3640 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3641 } 3642 3643 /* 3644 m - number of local rows 3645 n - number of columns (same on all processors) 3646 rstart - first row in new global matrix generated 3647 */ 3648 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3649 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3650 if (call == MAT_INITIAL_MATRIX) { 3651 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3652 ii = aij->i; 3653 jj = aij->j; 3654 3655 /* 3656 Determine the number of non-zeros in the diagonal and off-diagonal 3657 portions of the matrix in order to do correct preallocation 3658 */ 3659 3660 /* first get start and end of "diagonal" columns */ 3661 if (csize == PETSC_DECIDE) { 3662 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3663 if (mglobal == n) { /* square matrix */ 3664 nlocal = m; 3665 } else { 3666 nlocal = n/size + ((n % size) > rank); 3667 } 3668 } else { 3669 nlocal = csize; 3670 } 3671 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3672 rstart = rend - nlocal; 3673 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3674 3675 /* next, compute all the lengths */ 3676 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3677 olens = dlens + m; 3678 for (i=0; i<m; i++) { 3679 jend = ii[i+1] - ii[i]; 3680 olen = 0; 3681 dlen = 0; 3682 for (j=0; j<jend; j++) { 3683 if (*jj < rstart || *jj >= rend) olen++; 3684 else dlen++; 3685 jj++; 3686 } 3687 olens[i] = olen; 3688 dlens[i] = dlen; 3689 } 3690 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3691 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3692 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3693 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3694 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3695 ierr = PetscFree(dlens);CHKERRQ(ierr); 3696 } else { 3697 PetscInt ml,nl; 3698 3699 M = *newmat; 3700 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3701 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3702 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3703 /* 3704 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3705 rather than the slower MatSetValues(). 3706 */ 3707 M->was_assembled = PETSC_TRUE; 3708 M->assembled = PETSC_FALSE; 3709 } 3710 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3711 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3712 ii = aij->i; 3713 jj = aij->j; 3714 aa = aij->a; 3715 for (i=0; i<m; i++) { 3716 row = rstart + i; 3717 nz = ii[i+1] - ii[i]; 3718 cwork = jj; jj += nz; 3719 vwork = aa; aa += nz; 3720 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3721 } 3722 3723 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725 *newmat = M; 3726 3727 /* save submatrix used in processor for next request */ 3728 if (call == MAT_INITIAL_MATRIX) { 3729 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3730 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3731 } 3732 PetscFunctionReturn(0); 3733 } 3734 3735 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3736 { 3737 PetscInt m,cstart, cend,j,nnz,i,d; 3738 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3739 const PetscInt *JJ; 3740 PetscScalar *values; 3741 PetscErrorCode ierr; 3742 PetscBool nooffprocentries; 3743 3744 PetscFunctionBegin; 3745 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3746 3747 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3748 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3749 m = B->rmap->n; 3750 cstart = B->cmap->rstart; 3751 cend = B->cmap->rend; 3752 rstart = B->rmap->rstart; 3753 3754 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3755 3756 #if defined(PETSC_USE_DEBUG) 3757 for (i=0; i<m; i++) { 3758 nnz = Ii[i+1]- Ii[i]; 3759 JJ = J + Ii[i]; 3760 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3761 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3762 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3763 } 3764 #endif 3765 3766 for (i=0; i<m; i++) { 3767 nnz = Ii[i+1]- Ii[i]; 3768 JJ = J + Ii[i]; 3769 nnz_max = PetscMax(nnz_max,nnz); 3770 d = 0; 3771 for (j=0; j<nnz; j++) { 3772 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3773 } 3774 d_nnz[i] = d; 3775 o_nnz[i] = nnz - d; 3776 } 3777 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3778 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3779 3780 if (v) values = (PetscScalar*)v; 3781 else { 3782 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3783 } 3784 3785 for (i=0; i<m; i++) { 3786 ii = i + rstart; 3787 nnz = Ii[i+1]- Ii[i]; 3788 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3789 } 3790 nooffprocentries = B->nooffprocentries; 3791 B->nooffprocentries = PETSC_TRUE; 3792 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3793 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 B->nooffprocentries = nooffprocentries; 3795 3796 if (!v) { 3797 ierr = PetscFree(values);CHKERRQ(ierr); 3798 } 3799 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3800 PetscFunctionReturn(0); 3801 } 3802 3803 /*@ 3804 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3805 (the default parallel PETSc format). 3806 3807 Collective on MPI_Comm 3808 3809 Input Parameters: 3810 + B - the matrix 3811 . i - the indices into j for the start of each local row (starts with zero) 3812 . j - the column indices for each local row (starts with zero) 3813 - v - optional values in the matrix 3814 3815 Level: developer 3816 3817 Notes: 3818 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3819 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3820 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3821 3822 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3823 3824 The format which is used for the sparse matrix input, is equivalent to a 3825 row-major ordering.. i.e for the following matrix, the input data expected is 3826 as shown 3827 3828 $ 1 0 0 3829 $ 2 0 3 P0 3830 $ ------- 3831 $ 4 5 6 P1 3832 $ 3833 $ Process0 [P0]: rows_owned=[0,1] 3834 $ i = {0,1,3} [size = nrow+1 = 2+1] 3835 $ j = {0,0,2} [size = 3] 3836 $ v = {1,2,3} [size = 3] 3837 $ 3838 $ Process1 [P1]: rows_owned=[2] 3839 $ i = {0,3} [size = nrow+1 = 1+1] 3840 $ j = {0,1,2} [size = 3] 3841 $ v = {4,5,6} [size = 3] 3842 3843 .keywords: matrix, aij, compressed row, sparse, parallel 3844 3845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3846 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3847 @*/ 3848 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3849 { 3850 PetscErrorCode ierr; 3851 3852 PetscFunctionBegin; 3853 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3854 PetscFunctionReturn(0); 3855 } 3856 3857 /*@C 3858 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3859 (the default parallel PETSc format). For good matrix assembly performance 3860 the user should preallocate the matrix storage by setting the parameters 3861 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3862 performance can be increased by more than a factor of 50. 3863 3864 Collective on MPI_Comm 3865 3866 Input Parameters: 3867 + B - the matrix 3868 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3869 (same value is used for all local rows) 3870 . d_nnz - array containing the number of nonzeros in the various rows of the 3871 DIAGONAL portion of the local submatrix (possibly different for each row) 3872 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3873 The size of this array is equal to the number of local rows, i.e 'm'. 3874 For matrices that will be factored, you must leave room for (and set) 3875 the diagonal entry even if it is zero. 3876 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3877 submatrix (same value is used for all local rows). 3878 - o_nnz - array containing the number of nonzeros in the various rows of the 3879 OFF-DIAGONAL portion of the local submatrix (possibly different for 3880 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3881 structure. The size of this array is equal to the number 3882 of local rows, i.e 'm'. 3883 3884 If the *_nnz parameter is given then the *_nz parameter is ignored 3885 3886 The AIJ format (also called the Yale sparse matrix format or 3887 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3888 storage. The stored row and column indices begin with zero. 3889 See Users-Manual: ch_mat for details. 3890 3891 The parallel matrix is partitioned such that the first m0 rows belong to 3892 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3893 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3894 3895 The DIAGONAL portion of the local submatrix of a processor can be defined 3896 as the submatrix which is obtained by extraction the part corresponding to 3897 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3898 first row that belongs to the processor, r2 is the last row belonging to 3899 the this processor, and c1-c2 is range of indices of the local part of a 3900 vector suitable for applying the matrix to. This is an mxn matrix. In the 3901 common case of a square matrix, the row and column ranges are the same and 3902 the DIAGONAL part is also square. The remaining portion of the local 3903 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3904 3905 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3906 3907 You can call MatGetInfo() to get information on how effective the preallocation was; 3908 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3909 You can also run with the option -info and look for messages with the string 3910 malloc in them to see if additional memory allocation was needed. 3911 3912 Example usage: 3913 3914 Consider the following 8x8 matrix with 34 non-zero values, that is 3915 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3916 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3917 as follows: 3918 3919 .vb 3920 1 2 0 | 0 3 0 | 0 4 3921 Proc0 0 5 6 | 7 0 0 | 8 0 3922 9 0 10 | 11 0 0 | 12 0 3923 ------------------------------------- 3924 13 0 14 | 15 16 17 | 0 0 3925 Proc1 0 18 0 | 19 20 21 | 0 0 3926 0 0 0 | 22 23 0 | 24 0 3927 ------------------------------------- 3928 Proc2 25 26 27 | 0 0 28 | 29 0 3929 30 0 0 | 31 32 33 | 0 34 3930 .ve 3931 3932 This can be represented as a collection of submatrices as: 3933 3934 .vb 3935 A B C 3936 D E F 3937 G H I 3938 .ve 3939 3940 Where the submatrices A,B,C are owned by proc0, D,E,F are 3941 owned by proc1, G,H,I are owned by proc2. 3942 3943 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3944 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3945 The 'M','N' parameters are 8,8, and have the same values on all procs. 3946 3947 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3948 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3949 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3950 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3951 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3952 matrix, ans [DF] as another SeqAIJ matrix. 3953 3954 When d_nz, o_nz parameters are specified, d_nz storage elements are 3955 allocated for every row of the local diagonal submatrix, and o_nz 3956 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3957 One way to choose d_nz and o_nz is to use the max nonzerors per local 3958 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3959 In this case, the values of d_nz,o_nz are: 3960 .vb 3961 proc0 : dnz = 2, o_nz = 2 3962 proc1 : dnz = 3, o_nz = 2 3963 proc2 : dnz = 1, o_nz = 4 3964 .ve 3965 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3966 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3967 for proc3. i.e we are using 12+15+10=37 storage locations to store 3968 34 values. 3969 3970 When d_nnz, o_nnz parameters are specified, the storage is specified 3971 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3972 In the above case the values for d_nnz,o_nnz are: 3973 .vb 3974 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3975 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3976 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3977 .ve 3978 Here the space allocated is sum of all the above values i.e 34, and 3979 hence pre-allocation is perfect. 3980 3981 Level: intermediate 3982 3983 .keywords: matrix, aij, compressed row, sparse, parallel 3984 3985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3986 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3987 @*/ 3988 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3989 { 3990 PetscErrorCode ierr; 3991 3992 PetscFunctionBegin; 3993 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3994 PetscValidType(B,1); 3995 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3996 PetscFunctionReturn(0); 3997 } 3998 3999 /*@ 4000 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4001 CSR format the local rows. 4002 4003 Collective on MPI_Comm 4004 4005 Input Parameters: 4006 + comm - MPI communicator 4007 . m - number of local rows (Cannot be PETSC_DECIDE) 4008 . n - This value should be the same as the local size used in creating the 4009 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4010 calculated if N is given) For square matrices n is almost always m. 4011 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4012 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4013 . i - row indices 4014 . j - column indices 4015 - a - matrix values 4016 4017 Output Parameter: 4018 . mat - the matrix 4019 4020 Level: intermediate 4021 4022 Notes: 4023 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4024 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4025 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4026 4027 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4028 4029 The format which is used for the sparse matrix input, is equivalent to a 4030 row-major ordering.. i.e for the following matrix, the input data expected is 4031 as shown 4032 4033 $ 1 0 0 4034 $ 2 0 3 P0 4035 $ ------- 4036 $ 4 5 6 P1 4037 $ 4038 $ Process0 [P0]: rows_owned=[0,1] 4039 $ i = {0,1,3} [size = nrow+1 = 2+1] 4040 $ j = {0,0,2} [size = 3] 4041 $ v = {1,2,3} [size = 3] 4042 $ 4043 $ Process1 [P1]: rows_owned=[2] 4044 $ i = {0,3} [size = nrow+1 = 1+1] 4045 $ j = {0,1,2} [size = 3] 4046 $ v = {4,5,6} [size = 3] 4047 4048 .keywords: matrix, aij, compressed row, sparse, parallel 4049 4050 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4051 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4052 @*/ 4053 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4054 { 4055 PetscErrorCode ierr; 4056 4057 PetscFunctionBegin; 4058 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4059 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4060 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4061 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4062 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4063 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4064 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4065 PetscFunctionReturn(0); 4066 } 4067 4068 /*@C 4069 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4070 (the default parallel PETSc format). For good matrix assembly performance 4071 the user should preallocate the matrix storage by setting the parameters 4072 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4073 performance can be increased by more than a factor of 50. 4074 4075 Collective on MPI_Comm 4076 4077 Input Parameters: 4078 + comm - MPI communicator 4079 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4080 This value should be the same as the local size used in creating the 4081 y vector for the matrix-vector product y = Ax. 4082 . n - This value should be the same as the local size used in creating the 4083 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4084 calculated if N is given) For square matrices n is almost always m. 4085 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4086 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4087 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4088 (same value is used for all local rows) 4089 . d_nnz - array containing the number of nonzeros in the various rows of the 4090 DIAGONAL portion of the local submatrix (possibly different for each row) 4091 or NULL, if d_nz is used to specify the nonzero structure. 4092 The size of this array is equal to the number of local rows, i.e 'm'. 4093 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4094 submatrix (same value is used for all local rows). 4095 - o_nnz - array containing the number of nonzeros in the various rows of the 4096 OFF-DIAGONAL portion of the local submatrix (possibly different for 4097 each row) or NULL, if o_nz is used to specify the nonzero 4098 structure. The size of this array is equal to the number 4099 of local rows, i.e 'm'. 4100 4101 Output Parameter: 4102 . A - the matrix 4103 4104 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4105 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4106 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4107 4108 Notes: 4109 If the *_nnz parameter is given then the *_nz parameter is ignored 4110 4111 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4112 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4113 storage requirements for this matrix. 4114 4115 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4116 processor than it must be used on all processors that share the object for 4117 that argument. 4118 4119 The user MUST specify either the local or global matrix dimensions 4120 (possibly both). 4121 4122 The parallel matrix is partitioned across processors such that the 4123 first m0 rows belong to process 0, the next m1 rows belong to 4124 process 1, the next m2 rows belong to process 2 etc.. where 4125 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4126 values corresponding to [m x N] submatrix. 4127 4128 The columns are logically partitioned with the n0 columns belonging 4129 to 0th partition, the next n1 columns belonging to the next 4130 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4131 4132 The DIAGONAL portion of the local submatrix on any given processor 4133 is the submatrix corresponding to the rows and columns m,n 4134 corresponding to the given processor. i.e diagonal matrix on 4135 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4136 etc. The remaining portion of the local submatrix [m x (N-n)] 4137 constitute the OFF-DIAGONAL portion. The example below better 4138 illustrates this concept. 4139 4140 For a square global matrix we define each processor's diagonal portion 4141 to be its local rows and the corresponding columns (a square submatrix); 4142 each processor's off-diagonal portion encompasses the remainder of the 4143 local matrix (a rectangular submatrix). 4144 4145 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4146 4147 When calling this routine with a single process communicator, a matrix of 4148 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4149 type of communicator, use the construction mechanism 4150 .vb 4151 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4152 .ve 4153 4154 $ MatCreate(...,&A); 4155 $ MatSetType(A,MATMPIAIJ); 4156 $ MatSetSizes(A, m,n,M,N); 4157 $ MatMPIAIJSetPreallocation(A,...); 4158 4159 By default, this format uses inodes (identical nodes) when possible. 4160 We search for consecutive rows with the same nonzero structure, thereby 4161 reusing matrix information to achieve increased efficiency. 4162 4163 Options Database Keys: 4164 + -mat_no_inode - Do not use inodes 4165 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4166 4167 4168 4169 Example usage: 4170 4171 Consider the following 8x8 matrix with 34 non-zero values, that is 4172 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4173 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4174 as follows 4175 4176 .vb 4177 1 2 0 | 0 3 0 | 0 4 4178 Proc0 0 5 6 | 7 0 0 | 8 0 4179 9 0 10 | 11 0 0 | 12 0 4180 ------------------------------------- 4181 13 0 14 | 15 16 17 | 0 0 4182 Proc1 0 18 0 | 19 20 21 | 0 0 4183 0 0 0 | 22 23 0 | 24 0 4184 ------------------------------------- 4185 Proc2 25 26 27 | 0 0 28 | 29 0 4186 30 0 0 | 31 32 33 | 0 34 4187 .ve 4188 4189 This can be represented as a collection of submatrices as 4190 4191 .vb 4192 A B C 4193 D E F 4194 G H I 4195 .ve 4196 4197 Where the submatrices A,B,C are owned by proc0, D,E,F are 4198 owned by proc1, G,H,I are owned by proc2. 4199 4200 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4201 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4202 The 'M','N' parameters are 8,8, and have the same values on all procs. 4203 4204 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4205 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4206 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4207 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4208 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4209 matrix, ans [DF] as another SeqAIJ matrix. 4210 4211 When d_nz, o_nz parameters are specified, d_nz storage elements are 4212 allocated for every row of the local diagonal submatrix, and o_nz 4213 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4214 One way to choose d_nz and o_nz is to use the max nonzerors per local 4215 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4216 In this case, the values of d_nz,o_nz are 4217 .vb 4218 proc0 : dnz = 2, o_nz = 2 4219 proc1 : dnz = 3, o_nz = 2 4220 proc2 : dnz = 1, o_nz = 4 4221 .ve 4222 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4223 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4224 for proc3. i.e we are using 12+15+10=37 storage locations to store 4225 34 values. 4226 4227 When d_nnz, o_nnz parameters are specified, the storage is specified 4228 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4229 In the above case the values for d_nnz,o_nnz are 4230 .vb 4231 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4232 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4233 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4234 .ve 4235 Here the space allocated is sum of all the above values i.e 34, and 4236 hence pre-allocation is perfect. 4237 4238 Level: intermediate 4239 4240 .keywords: matrix, aij, compressed row, sparse, parallel 4241 4242 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4243 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4244 @*/ 4245 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4246 { 4247 PetscErrorCode ierr; 4248 PetscMPIInt size; 4249 4250 PetscFunctionBegin; 4251 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4252 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4254 if (size > 1) { 4255 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4256 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4257 } else { 4258 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4259 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4260 } 4261 PetscFunctionReturn(0); 4262 } 4263 4264 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4265 { 4266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4267 PetscBool flg; 4268 PetscErrorCode ierr; 4269 4270 PetscFunctionBegin; 4271 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4272 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4273 if (Ad) *Ad = a->A; 4274 if (Ao) *Ao = a->B; 4275 if (colmap) *colmap = a->garray; 4276 PetscFunctionReturn(0); 4277 } 4278 4279 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4280 { 4281 PetscErrorCode ierr; 4282 PetscInt m,N,i,rstart,nnz,Ii; 4283 PetscInt *indx; 4284 PetscScalar *values; 4285 4286 PetscFunctionBegin; 4287 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4288 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4289 PetscInt *dnz,*onz,sum,bs,cbs; 4290 4291 if (n == PETSC_DECIDE) { 4292 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4293 } 4294 /* Check sum(n) = N */ 4295 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4296 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4297 4298 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4299 rstart -= m; 4300 4301 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4302 for (i=0; i<m; i++) { 4303 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4304 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4305 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4306 } 4307 4308 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4309 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4310 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4311 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4312 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4313 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4314 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4315 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4316 } 4317 4318 /* numeric phase */ 4319 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4320 for (i=0; i<m; i++) { 4321 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4322 Ii = i + rstart; 4323 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4324 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4325 } 4326 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4327 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4332 { 4333 PetscErrorCode ierr; 4334 PetscMPIInt rank; 4335 PetscInt m,N,i,rstart,nnz; 4336 size_t len; 4337 const PetscInt *indx; 4338 PetscViewer out; 4339 char *name; 4340 Mat B; 4341 const PetscScalar *values; 4342 4343 PetscFunctionBegin; 4344 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4345 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4346 /* Should this be the type of the diagonal block of A? */ 4347 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4348 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4349 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4350 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4351 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4352 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4353 for (i=0; i<m; i++) { 4354 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4355 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4356 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4357 } 4358 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4360 4361 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4362 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4363 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4364 sprintf(name,"%s.%d",outfile,rank); 4365 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4366 ierr = PetscFree(name);CHKERRQ(ierr); 4367 ierr = MatView(B,out);CHKERRQ(ierr); 4368 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4369 ierr = MatDestroy(&B);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4374 { 4375 PetscErrorCode ierr; 4376 Mat_Merge_SeqsToMPI *merge; 4377 PetscContainer container; 4378 4379 PetscFunctionBegin; 4380 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4381 if (container) { 4382 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4383 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4384 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4385 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4386 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4391 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4392 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4393 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4395 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4396 ierr = PetscFree(merge);CHKERRQ(ierr); 4397 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4398 } 4399 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 #include <../src/mat/utils/freespace.h> 4404 #include <petscbt.h> 4405 4406 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4407 { 4408 PetscErrorCode ierr; 4409 MPI_Comm comm; 4410 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4411 PetscMPIInt size,rank,taga,*len_s; 4412 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4413 PetscInt proc,m; 4414 PetscInt **buf_ri,**buf_rj; 4415 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4416 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4417 MPI_Request *s_waits,*r_waits; 4418 MPI_Status *status; 4419 MatScalar *aa=a->a; 4420 MatScalar **abuf_r,*ba_i; 4421 Mat_Merge_SeqsToMPI *merge; 4422 PetscContainer container; 4423 4424 PetscFunctionBegin; 4425 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4426 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4427 4428 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4430 4431 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4432 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4433 4434 bi = merge->bi; 4435 bj = merge->bj; 4436 buf_ri = merge->buf_ri; 4437 buf_rj = merge->buf_rj; 4438 4439 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4440 owners = merge->rowmap->range; 4441 len_s = merge->len_s; 4442 4443 /* send and recv matrix values */ 4444 /*-----------------------------*/ 4445 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4446 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4447 4448 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4449 for (proc=0,k=0; proc<size; proc++) { 4450 if (!len_s[proc]) continue; 4451 i = owners[proc]; 4452 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4453 k++; 4454 } 4455 4456 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4457 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4458 ierr = PetscFree(status);CHKERRQ(ierr); 4459 4460 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4461 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4462 4463 /* insert mat values of mpimat */ 4464 /*----------------------------*/ 4465 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4466 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4467 4468 for (k=0; k<merge->nrecv; k++) { 4469 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4470 nrows = *(buf_ri_k[k]); 4471 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4472 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4473 } 4474 4475 /* set values of ba */ 4476 m = merge->rowmap->n; 4477 for (i=0; i<m; i++) { 4478 arow = owners[rank] + i; 4479 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4480 bnzi = bi[i+1] - bi[i]; 4481 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4482 4483 /* add local non-zero vals of this proc's seqmat into ba */ 4484 anzi = ai[arow+1] - ai[arow]; 4485 aj = a->j + ai[arow]; 4486 aa = a->a + ai[arow]; 4487 nextaj = 0; 4488 for (j=0; nextaj<anzi; j++) { 4489 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4490 ba_i[j] += aa[nextaj++]; 4491 } 4492 } 4493 4494 /* add received vals into ba */ 4495 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4496 /* i-th row */ 4497 if (i == *nextrow[k]) { 4498 anzi = *(nextai[k]+1) - *nextai[k]; 4499 aj = buf_rj[k] + *(nextai[k]); 4500 aa = abuf_r[k] + *(nextai[k]); 4501 nextaj = 0; 4502 for (j=0; nextaj<anzi; j++) { 4503 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4504 ba_i[j] += aa[nextaj++]; 4505 } 4506 } 4507 nextrow[k]++; nextai[k]++; 4508 } 4509 } 4510 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4511 } 4512 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4513 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4514 4515 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4516 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4517 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4518 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4519 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4520 PetscFunctionReturn(0); 4521 } 4522 4523 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4524 { 4525 PetscErrorCode ierr; 4526 Mat B_mpi; 4527 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4528 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4529 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4530 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4531 PetscInt len,proc,*dnz,*onz,bs,cbs; 4532 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4533 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4534 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4535 MPI_Status *status; 4536 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4537 PetscBT lnkbt; 4538 Mat_Merge_SeqsToMPI *merge; 4539 PetscContainer container; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4543 4544 /* make sure it is a PETSc comm */ 4545 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4546 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4547 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4548 4549 ierr = PetscNew(&merge);CHKERRQ(ierr); 4550 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4551 4552 /* determine row ownership */ 4553 /*---------------------------------------------------------*/ 4554 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4555 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4556 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4557 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4558 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4559 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4560 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4561 4562 m = merge->rowmap->n; 4563 owners = merge->rowmap->range; 4564 4565 /* determine the number of messages to send, their lengths */ 4566 /*---------------------------------------------------------*/ 4567 len_s = merge->len_s; 4568 4569 len = 0; /* length of buf_si[] */ 4570 merge->nsend = 0; 4571 for (proc=0; proc<size; proc++) { 4572 len_si[proc] = 0; 4573 if (proc == rank) { 4574 len_s[proc] = 0; 4575 } else { 4576 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4577 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4578 } 4579 if (len_s[proc]) { 4580 merge->nsend++; 4581 nrows = 0; 4582 for (i=owners[proc]; i<owners[proc+1]; i++) { 4583 if (ai[i+1] > ai[i]) nrows++; 4584 } 4585 len_si[proc] = 2*(nrows+1); 4586 len += len_si[proc]; 4587 } 4588 } 4589 4590 /* determine the number and length of messages to receive for ij-structure */ 4591 /*-------------------------------------------------------------------------*/ 4592 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4593 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4594 4595 /* post the Irecv of j-structure */ 4596 /*-------------------------------*/ 4597 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4598 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4599 4600 /* post the Isend of j-structure */ 4601 /*--------------------------------*/ 4602 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4603 4604 for (proc=0, k=0; proc<size; proc++) { 4605 if (!len_s[proc]) continue; 4606 i = owners[proc]; 4607 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4608 k++; 4609 } 4610 4611 /* receives and sends of j-structure are complete */ 4612 /*------------------------------------------------*/ 4613 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4614 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4615 4616 /* send and recv i-structure */ 4617 /*---------------------------*/ 4618 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4619 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4620 4621 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4622 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4623 for (proc=0,k=0; proc<size; proc++) { 4624 if (!len_s[proc]) continue; 4625 /* form outgoing message for i-structure: 4626 buf_si[0]: nrows to be sent 4627 [1:nrows]: row index (global) 4628 [nrows+1:2*nrows+1]: i-structure index 4629 */ 4630 /*-------------------------------------------*/ 4631 nrows = len_si[proc]/2 - 1; 4632 buf_si_i = buf_si + nrows+1; 4633 buf_si[0] = nrows; 4634 buf_si_i[0] = 0; 4635 nrows = 0; 4636 for (i=owners[proc]; i<owners[proc+1]; i++) { 4637 anzi = ai[i+1] - ai[i]; 4638 if (anzi) { 4639 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4640 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4641 nrows++; 4642 } 4643 } 4644 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4645 k++; 4646 buf_si += len_si[proc]; 4647 } 4648 4649 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4650 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4651 4652 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4653 for (i=0; i<merge->nrecv; i++) { 4654 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4655 } 4656 4657 ierr = PetscFree(len_si);CHKERRQ(ierr); 4658 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4659 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4660 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4661 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4662 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4663 ierr = PetscFree(status);CHKERRQ(ierr); 4664 4665 /* compute a local seq matrix in each processor */ 4666 /*----------------------------------------------*/ 4667 /* allocate bi array and free space for accumulating nonzero column info */ 4668 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4669 bi[0] = 0; 4670 4671 /* create and initialize a linked list */ 4672 nlnk = N+1; 4673 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4674 4675 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4676 len = ai[owners[rank+1]] - ai[owners[rank]]; 4677 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4678 4679 current_space = free_space; 4680 4681 /* determine symbolic info for each local row */ 4682 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4683 4684 for (k=0; k<merge->nrecv; k++) { 4685 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4686 nrows = *buf_ri_k[k]; 4687 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4688 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4689 } 4690 4691 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4692 len = 0; 4693 for (i=0; i<m; i++) { 4694 bnzi = 0; 4695 /* add local non-zero cols of this proc's seqmat into lnk */ 4696 arow = owners[rank] + i; 4697 anzi = ai[arow+1] - ai[arow]; 4698 aj = a->j + ai[arow]; 4699 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4700 bnzi += nlnk; 4701 /* add received col data into lnk */ 4702 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4703 if (i == *nextrow[k]) { /* i-th row */ 4704 anzi = *(nextai[k]+1) - *nextai[k]; 4705 aj = buf_rj[k] + *nextai[k]; 4706 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4707 bnzi += nlnk; 4708 nextrow[k]++; nextai[k]++; 4709 } 4710 } 4711 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4712 4713 /* if free space is not available, make more free space */ 4714 if (current_space->local_remaining<bnzi) { 4715 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4716 nspacedouble++; 4717 } 4718 /* copy data into free space, then initialize lnk */ 4719 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4720 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4721 4722 current_space->array += bnzi; 4723 current_space->local_used += bnzi; 4724 current_space->local_remaining -= bnzi; 4725 4726 bi[i+1] = bi[i] + bnzi; 4727 } 4728 4729 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4730 4731 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4732 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4733 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4734 4735 /* create symbolic parallel matrix B_mpi */ 4736 /*---------------------------------------*/ 4737 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4738 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4739 if (n==PETSC_DECIDE) { 4740 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4741 } else { 4742 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4743 } 4744 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4745 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4746 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4747 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4748 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4749 4750 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4751 B_mpi->assembled = PETSC_FALSE; 4752 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4753 merge->bi = bi; 4754 merge->bj = bj; 4755 merge->buf_ri = buf_ri; 4756 merge->buf_rj = buf_rj; 4757 merge->coi = NULL; 4758 merge->coj = NULL; 4759 merge->owners_co = NULL; 4760 4761 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4762 4763 /* attach the supporting struct to B_mpi for reuse */ 4764 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4765 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4766 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4767 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4768 *mpimat = B_mpi; 4769 4770 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 /*@C 4775 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4776 matrices from each processor 4777 4778 Collective on MPI_Comm 4779 4780 Input Parameters: 4781 + comm - the communicators the parallel matrix will live on 4782 . seqmat - the input sequential matrices 4783 . m - number of local rows (or PETSC_DECIDE) 4784 . n - number of local columns (or PETSC_DECIDE) 4785 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4786 4787 Output Parameter: 4788 . mpimat - the parallel matrix generated 4789 4790 Level: advanced 4791 4792 Notes: 4793 The dimensions of the sequential matrix in each processor MUST be the same. 4794 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4795 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4796 @*/ 4797 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4798 { 4799 PetscErrorCode ierr; 4800 PetscMPIInt size; 4801 4802 PetscFunctionBegin; 4803 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4804 if (size == 1) { 4805 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4806 if (scall == MAT_INITIAL_MATRIX) { 4807 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4808 } else { 4809 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4810 } 4811 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4812 PetscFunctionReturn(0); 4813 } 4814 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4815 if (scall == MAT_INITIAL_MATRIX) { 4816 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4817 } 4818 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4819 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4820 PetscFunctionReturn(0); 4821 } 4822 4823 /*@ 4824 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4825 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4826 with MatGetSize() 4827 4828 Not Collective 4829 4830 Input Parameters: 4831 + A - the matrix 4832 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4833 4834 Output Parameter: 4835 . A_loc - the local sequential matrix generated 4836 4837 Level: developer 4838 4839 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4840 4841 @*/ 4842 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4843 { 4844 PetscErrorCode ierr; 4845 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4846 Mat_SeqAIJ *mat,*a,*b; 4847 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4848 MatScalar *aa,*ba,*cam; 4849 PetscScalar *ca; 4850 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4851 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4852 PetscBool match; 4853 MPI_Comm comm; 4854 PetscMPIInt size; 4855 4856 PetscFunctionBegin; 4857 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4858 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4859 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4860 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4861 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4862 4863 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4864 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4865 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4866 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4867 aa = a->a; ba = b->a; 4868 if (scall == MAT_INITIAL_MATRIX) { 4869 if (size == 1) { 4870 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4875 ci[0] = 0; 4876 for (i=0; i<am; i++) { 4877 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4878 } 4879 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4880 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4881 k = 0; 4882 for (i=0; i<am; i++) { 4883 ncols_o = bi[i+1] - bi[i]; 4884 ncols_d = ai[i+1] - ai[i]; 4885 /* off-diagonal portion of A */ 4886 for (jo=0; jo<ncols_o; jo++) { 4887 col = cmap[*bj]; 4888 if (col >= cstart) break; 4889 cj[k] = col; bj++; 4890 ca[k++] = *ba++; 4891 } 4892 /* diagonal portion of A */ 4893 for (j=0; j<ncols_d; j++) { 4894 cj[k] = cstart + *aj++; 4895 ca[k++] = *aa++; 4896 } 4897 /* off-diagonal portion of A */ 4898 for (j=jo; j<ncols_o; j++) { 4899 cj[k] = cmap[*bj++]; 4900 ca[k++] = *ba++; 4901 } 4902 } 4903 /* put together the new matrix */ 4904 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4905 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4906 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4907 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4908 mat->free_a = PETSC_TRUE; 4909 mat->free_ij = PETSC_TRUE; 4910 mat->nonew = 0; 4911 } else if (scall == MAT_REUSE_MATRIX) { 4912 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4913 ci = mat->i; cj = mat->j; cam = mat->a; 4914 for (i=0; i<am; i++) { 4915 /* off-diagonal portion of A */ 4916 ncols_o = bi[i+1] - bi[i]; 4917 for (jo=0; jo<ncols_o; jo++) { 4918 col = cmap[*bj]; 4919 if (col >= cstart) break; 4920 *cam++ = *ba++; bj++; 4921 } 4922 /* diagonal portion of A */ 4923 ncols_d = ai[i+1] - ai[i]; 4924 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4925 /* off-diagonal portion of A */ 4926 for (j=jo; j<ncols_o; j++) { 4927 *cam++ = *ba++; bj++; 4928 } 4929 } 4930 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4931 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 4935 /*@C 4936 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4937 4938 Not Collective 4939 4940 Input Parameters: 4941 + A - the matrix 4942 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4943 - row, col - index sets of rows and columns to extract (or NULL) 4944 4945 Output Parameter: 4946 . A_loc - the local sequential matrix generated 4947 4948 Level: developer 4949 4950 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4951 4952 @*/ 4953 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4954 { 4955 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4956 PetscErrorCode ierr; 4957 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4958 IS isrowa,iscola; 4959 Mat *aloc; 4960 PetscBool match; 4961 4962 PetscFunctionBegin; 4963 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4964 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4965 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4966 if (!row) { 4967 start = A->rmap->rstart; end = A->rmap->rend; 4968 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4969 } else { 4970 isrowa = *row; 4971 } 4972 if (!col) { 4973 start = A->cmap->rstart; 4974 cmap = a->garray; 4975 nzA = a->A->cmap->n; 4976 nzB = a->B->cmap->n; 4977 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4978 ncols = 0; 4979 for (i=0; i<nzB; i++) { 4980 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4981 else break; 4982 } 4983 imark = i; 4984 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4985 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4986 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4987 } else { 4988 iscola = *col; 4989 } 4990 if (scall != MAT_INITIAL_MATRIX) { 4991 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4992 aloc[0] = *A_loc; 4993 } 4994 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4995 *A_loc = aloc[0]; 4996 ierr = PetscFree(aloc);CHKERRQ(ierr); 4997 if (!row) { 4998 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4999 } 5000 if (!col) { 5001 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5002 } 5003 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 /*@C 5008 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5009 5010 Collective on Mat 5011 5012 Input Parameters: 5013 + A,B - the matrices in mpiaij format 5014 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5015 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5016 5017 Output Parameter: 5018 + rowb, colb - index sets of rows and columns of B to extract 5019 - B_seq - the sequential matrix generated 5020 5021 Level: developer 5022 5023 @*/ 5024 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5025 { 5026 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5027 PetscErrorCode ierr; 5028 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5029 IS isrowb,iscolb; 5030 Mat *bseq=NULL; 5031 5032 PetscFunctionBegin; 5033 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5034 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5035 } 5036 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5037 5038 if (scall == MAT_INITIAL_MATRIX) { 5039 start = A->cmap->rstart; 5040 cmap = a->garray; 5041 nzA = a->A->cmap->n; 5042 nzB = a->B->cmap->n; 5043 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5044 ncols = 0; 5045 for (i=0; i<nzB; i++) { /* row < local row index */ 5046 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5047 else break; 5048 } 5049 imark = i; 5050 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5051 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5052 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5053 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5054 } else { 5055 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5056 isrowb = *rowb; iscolb = *colb; 5057 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5058 bseq[0] = *B_seq; 5059 } 5060 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5061 *B_seq = bseq[0]; 5062 ierr = PetscFree(bseq);CHKERRQ(ierr); 5063 if (!rowb) { 5064 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5065 } else { 5066 *rowb = isrowb; 5067 } 5068 if (!colb) { 5069 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5070 } else { 5071 *colb = iscolb; 5072 } 5073 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5074 PetscFunctionReturn(0); 5075 } 5076 5077 /* 5078 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5079 of the OFF-DIAGONAL portion of local A 5080 5081 Collective on Mat 5082 5083 Input Parameters: 5084 + A,B - the matrices in mpiaij format 5085 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5086 5087 Output Parameter: 5088 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5089 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5090 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5091 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5092 5093 Level: developer 5094 5095 */ 5096 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5097 { 5098 VecScatter_MPI_General *gen_to,*gen_from; 5099 PetscErrorCode ierr; 5100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5101 Mat_SeqAIJ *b_oth; 5102 VecScatter ctx =a->Mvctx; 5103 MPI_Comm comm; 5104 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5105 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5106 PetscInt *rvalues,*svalues; 5107 MatScalar *b_otha,*bufa,*bufA; 5108 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5109 MPI_Request *rwaits = NULL,*swaits = NULL; 5110 MPI_Status *sstatus,rstatus; 5111 PetscMPIInt jj,size; 5112 PetscInt *cols,sbs,rbs; 5113 PetscScalar *vals; 5114 5115 PetscFunctionBegin; 5116 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5118 5119 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5120 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5121 } 5122 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5124 5125 if (size == 1) { 5126 startsj_s = NULL; 5127 bufa_ptr = NULL; 5128 *B_oth = NULL; 5129 PetscFunctionReturn(0); 5130 } 5131 5132 gen_to = (VecScatter_MPI_General*)ctx->todata; 5133 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5134 nrecvs = gen_from->n; 5135 nsends = gen_to->n; 5136 5137 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5138 srow = gen_to->indices; /* local row index to be sent */ 5139 sstarts = gen_to->starts; 5140 sprocs = gen_to->procs; 5141 sstatus = gen_to->sstatus; 5142 sbs = gen_to->bs; 5143 rstarts = gen_from->starts; 5144 rprocs = gen_from->procs; 5145 rbs = gen_from->bs; 5146 5147 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5148 if (scall == MAT_INITIAL_MATRIX) { 5149 /* i-array */ 5150 /*---------*/ 5151 /* post receives */ 5152 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5153 for (i=0; i<nrecvs; i++) { 5154 rowlen = rvalues + rstarts[i]*rbs; 5155 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5156 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5157 } 5158 5159 /* pack the outgoing message */ 5160 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5161 5162 sstartsj[0] = 0; 5163 rstartsj[0] = 0; 5164 len = 0; /* total length of j or a array to be sent */ 5165 k = 0; 5166 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5167 for (i=0; i<nsends; i++) { 5168 rowlen = svalues + sstarts[i]*sbs; 5169 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5170 for (j=0; j<nrows; j++) { 5171 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5172 for (l=0; l<sbs; l++) { 5173 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5174 5175 rowlen[j*sbs+l] = ncols; 5176 5177 len += ncols; 5178 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5179 } 5180 k++; 5181 } 5182 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5183 5184 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5185 } 5186 /* recvs and sends of i-array are completed */ 5187 i = nrecvs; 5188 while (i--) { 5189 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5190 } 5191 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5192 ierr = PetscFree(svalues);CHKERRQ(ierr); 5193 5194 /* allocate buffers for sending j and a arrays */ 5195 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5196 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5197 5198 /* create i-array of B_oth */ 5199 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5200 5201 b_othi[0] = 0; 5202 len = 0; /* total length of j or a array to be received */ 5203 k = 0; 5204 for (i=0; i<nrecvs; i++) { 5205 rowlen = rvalues + rstarts[i]*rbs; 5206 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5207 for (j=0; j<nrows; j++) { 5208 b_othi[k+1] = b_othi[k] + rowlen[j]; 5209 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5210 k++; 5211 } 5212 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5213 } 5214 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5215 5216 /* allocate space for j and a arrrays of B_oth */ 5217 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5218 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5219 5220 /* j-array */ 5221 /*---------*/ 5222 /* post receives of j-array */ 5223 for (i=0; i<nrecvs; i++) { 5224 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5225 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5226 } 5227 5228 /* pack the outgoing message j-array */ 5229 k = 0; 5230 for (i=0; i<nsends; i++) { 5231 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5232 bufJ = bufj+sstartsj[i]; 5233 for (j=0; j<nrows; j++) { 5234 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5235 for (ll=0; ll<sbs; ll++) { 5236 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5237 for (l=0; l<ncols; l++) { 5238 *bufJ++ = cols[l]; 5239 } 5240 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5241 } 5242 } 5243 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5244 } 5245 5246 /* recvs and sends of j-array are completed */ 5247 i = nrecvs; 5248 while (i--) { 5249 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5250 } 5251 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5252 } else if (scall == MAT_REUSE_MATRIX) { 5253 sstartsj = *startsj_s; 5254 rstartsj = *startsj_r; 5255 bufa = *bufa_ptr; 5256 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5257 b_otha = b_oth->a; 5258 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5259 5260 /* a-array */ 5261 /*---------*/ 5262 /* post receives of a-array */ 5263 for (i=0; i<nrecvs; i++) { 5264 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5265 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5266 } 5267 5268 /* pack the outgoing message a-array */ 5269 k = 0; 5270 for (i=0; i<nsends; i++) { 5271 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5272 bufA = bufa+sstartsj[i]; 5273 for (j=0; j<nrows; j++) { 5274 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5275 for (ll=0; ll<sbs; ll++) { 5276 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5277 for (l=0; l<ncols; l++) { 5278 *bufA++ = vals[l]; 5279 } 5280 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5281 } 5282 } 5283 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5284 } 5285 /* recvs and sends of a-array are completed */ 5286 i = nrecvs; 5287 while (i--) { 5288 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5289 } 5290 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5291 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5292 5293 if (scall == MAT_INITIAL_MATRIX) { 5294 /* put together the new matrix */ 5295 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5296 5297 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5298 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5299 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5300 b_oth->free_a = PETSC_TRUE; 5301 b_oth->free_ij = PETSC_TRUE; 5302 b_oth->nonew = 0; 5303 5304 ierr = PetscFree(bufj);CHKERRQ(ierr); 5305 if (!startsj_s || !bufa_ptr) { 5306 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5307 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5308 } else { 5309 *startsj_s = sstartsj; 5310 *startsj_r = rstartsj; 5311 *bufa_ptr = bufa; 5312 } 5313 } 5314 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5315 PetscFunctionReturn(0); 5316 } 5317 5318 /*@C 5319 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5320 5321 Not Collective 5322 5323 Input Parameters: 5324 . A - The matrix in mpiaij format 5325 5326 Output Parameter: 5327 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5328 . colmap - A map from global column index to local index into lvec 5329 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5330 5331 Level: developer 5332 5333 @*/ 5334 #if defined(PETSC_USE_CTABLE) 5335 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5336 #else 5337 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5338 #endif 5339 { 5340 Mat_MPIAIJ *a; 5341 5342 PetscFunctionBegin; 5343 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5344 PetscValidPointer(lvec, 2); 5345 PetscValidPointer(colmap, 3); 5346 PetscValidPointer(multScatter, 4); 5347 a = (Mat_MPIAIJ*) A->data; 5348 if (lvec) *lvec = a->lvec; 5349 if (colmap) *colmap = a->colmap; 5350 if (multScatter) *multScatter = a->Mvctx; 5351 PetscFunctionReturn(0); 5352 } 5353 5354 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5355 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5356 #if defined(PETSC_HAVE_MKL_SPARSE) 5357 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5358 #endif 5359 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5360 #if defined(PETSC_HAVE_ELEMENTAL) 5361 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5362 #endif 5363 #if defined(PETSC_HAVE_HYPRE) 5364 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5365 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5366 #endif 5367 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5368 5369 /* 5370 Computes (B'*A')' since computing B*A directly is untenable 5371 5372 n p p 5373 ( ) ( ) ( ) 5374 m ( A ) * n ( B ) = m ( C ) 5375 ( ) ( ) ( ) 5376 5377 */ 5378 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5379 { 5380 PetscErrorCode ierr; 5381 Mat At,Bt,Ct; 5382 5383 PetscFunctionBegin; 5384 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5385 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5386 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5387 ierr = MatDestroy(&At);CHKERRQ(ierr); 5388 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5389 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5390 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5391 PetscFunctionReturn(0); 5392 } 5393 5394 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5395 { 5396 PetscErrorCode ierr; 5397 PetscInt m=A->rmap->n,n=B->cmap->n; 5398 Mat Cmat; 5399 5400 PetscFunctionBegin; 5401 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5402 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5403 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5404 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5405 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5406 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5407 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5408 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5409 5410 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5411 5412 *C = Cmat; 5413 PetscFunctionReturn(0); 5414 } 5415 5416 /* ----------------------------------------------------------------*/ 5417 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5418 { 5419 PetscErrorCode ierr; 5420 5421 PetscFunctionBegin; 5422 if (scall == MAT_INITIAL_MATRIX) { 5423 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5424 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5425 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5426 } 5427 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5428 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5429 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5430 PetscFunctionReturn(0); 5431 } 5432 5433 /*MC 5434 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5435 5436 Options Database Keys: 5437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5438 5439 Level: beginner 5440 5441 .seealso: MatCreateAIJ() 5442 M*/ 5443 5444 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5445 { 5446 Mat_MPIAIJ *b; 5447 PetscErrorCode ierr; 5448 PetscMPIInt size; 5449 5450 PetscFunctionBegin; 5451 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5452 5453 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5454 B->data = (void*)b; 5455 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5456 B->assembled = PETSC_FALSE; 5457 B->insertmode = NOT_SET_VALUES; 5458 b->size = size; 5459 5460 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5461 5462 /* build cache for off array entries formed */ 5463 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5464 5465 b->donotstash = PETSC_FALSE; 5466 b->colmap = 0; 5467 b->garray = 0; 5468 b->roworiented = PETSC_TRUE; 5469 5470 /* stuff used for matrix vector multiply */ 5471 b->lvec = NULL; 5472 b->Mvctx = NULL; 5473 5474 /* stuff for MatGetRow() */ 5475 b->rowindices = 0; 5476 b->rowvalues = 0; 5477 b->getrowactive = PETSC_FALSE; 5478 5479 /* flexible pointer used in CUSP/CUSPARSE classes */ 5480 b->spptr = NULL; 5481 5482 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5483 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5484 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5485 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5486 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5487 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5488 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5489 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5490 #if defined(PETSC_HAVE_MKL_SPARSE) 5491 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5492 #endif 5493 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5494 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5495 #if defined(PETSC_HAVE_ELEMENTAL) 5496 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5497 #endif 5498 #if defined(PETSC_HAVE_HYPRE) 5499 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5500 #endif 5501 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5502 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5503 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5504 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5505 #if defined(PETSC_HAVE_HYPRE) 5506 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5507 #endif 5508 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5509 PetscFunctionReturn(0); 5510 } 5511 5512 /*@C 5513 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5514 and "off-diagonal" part of the matrix in CSR format. 5515 5516 Collective on MPI_Comm 5517 5518 Input Parameters: 5519 + comm - MPI communicator 5520 . m - number of local rows (Cannot be PETSC_DECIDE) 5521 . n - This value should be the same as the local size used in creating the 5522 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5523 calculated if N is given) For square matrices n is almost always m. 5524 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5525 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5526 . i - row indices for "diagonal" portion of matrix 5527 . j - column indices 5528 . a - matrix values 5529 . oi - row indices for "off-diagonal" portion of matrix 5530 . oj - column indices 5531 - oa - matrix values 5532 5533 Output Parameter: 5534 . mat - the matrix 5535 5536 Level: advanced 5537 5538 Notes: 5539 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5540 must free the arrays once the matrix has been destroyed and not before. 5541 5542 The i and j indices are 0 based 5543 5544 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5545 5546 This sets local rows and cannot be used to set off-processor values. 5547 5548 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5549 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5550 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5551 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5552 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5553 communication if it is known that only local entries will be set. 5554 5555 .keywords: matrix, aij, compressed row, sparse, parallel 5556 5557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5558 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5559 @*/ 5560 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5561 { 5562 PetscErrorCode ierr; 5563 Mat_MPIAIJ *maij; 5564 5565 PetscFunctionBegin; 5566 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5567 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5568 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5569 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5570 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5571 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5572 maij = (Mat_MPIAIJ*) (*mat)->data; 5573 5574 (*mat)->preallocated = PETSC_TRUE; 5575 5576 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5577 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5578 5579 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5580 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5581 5582 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5583 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5584 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5585 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5586 5587 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5588 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5589 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5590 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5591 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5592 PetscFunctionReturn(0); 5593 } 5594 5595 /* 5596 Special version for direct calls from Fortran 5597 */ 5598 #include <petsc/private/fortranimpl.h> 5599 5600 /* Change these macros so can be used in void function */ 5601 #undef CHKERRQ 5602 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5603 #undef SETERRQ2 5604 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5605 #undef SETERRQ3 5606 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5607 #undef SETERRQ 5608 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5609 5610 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5611 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5613 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5614 #else 5615 #endif 5616 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5617 { 5618 Mat mat = *mmat; 5619 PetscInt m = *mm, n = *mn; 5620 InsertMode addv = *maddv; 5621 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5622 PetscScalar value; 5623 PetscErrorCode ierr; 5624 5625 MatCheckPreallocated(mat,1); 5626 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5627 5628 #if defined(PETSC_USE_DEBUG) 5629 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5630 #endif 5631 { 5632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5634 PetscBool roworiented = aij->roworiented; 5635 5636 /* Some Variables required in the macro */ 5637 Mat A = aij->A; 5638 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5639 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5640 MatScalar *aa = a->a; 5641 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5642 Mat B = aij->B; 5643 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5644 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5645 MatScalar *ba = b->a; 5646 5647 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5648 PetscInt nonew = a->nonew; 5649 MatScalar *ap1,*ap2; 5650 5651 PetscFunctionBegin; 5652 for (i=0; i<m; i++) { 5653 if (im[i] < 0) continue; 5654 #if defined(PETSC_USE_DEBUG) 5655 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5656 #endif 5657 if (im[i] >= rstart && im[i] < rend) { 5658 row = im[i] - rstart; 5659 lastcol1 = -1; 5660 rp1 = aj + ai[row]; 5661 ap1 = aa + ai[row]; 5662 rmax1 = aimax[row]; 5663 nrow1 = ailen[row]; 5664 low1 = 0; 5665 high1 = nrow1; 5666 lastcol2 = -1; 5667 rp2 = bj + bi[row]; 5668 ap2 = ba + bi[row]; 5669 rmax2 = bimax[row]; 5670 nrow2 = bilen[row]; 5671 low2 = 0; 5672 high2 = nrow2; 5673 5674 for (j=0; j<n; j++) { 5675 if (roworiented) value = v[i*n+j]; 5676 else value = v[i+j*m]; 5677 if (in[j] >= cstart && in[j] < cend) { 5678 col = in[j] - cstart; 5679 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5680 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5681 } else if (in[j] < 0) continue; 5682 #if defined(PETSC_USE_DEBUG) 5683 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5684 #endif 5685 else { 5686 if (mat->was_assembled) { 5687 if (!aij->colmap) { 5688 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5689 } 5690 #if defined(PETSC_USE_CTABLE) 5691 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5692 col--; 5693 #else 5694 col = aij->colmap[in[j]] - 1; 5695 #endif 5696 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5697 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5699 col = in[j]; 5700 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5701 B = aij->B; 5702 b = (Mat_SeqAIJ*)B->data; 5703 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5704 rp2 = bj + bi[row]; 5705 ap2 = ba + bi[row]; 5706 rmax2 = bimax[row]; 5707 nrow2 = bilen[row]; 5708 low2 = 0; 5709 high2 = nrow2; 5710 bm = aij->B->rmap->n; 5711 ba = b->a; 5712 } 5713 } else col = in[j]; 5714 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5715 } 5716 } 5717 } else if (!aij->donotstash) { 5718 if (roworiented) { 5719 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5720 } else { 5721 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5722 } 5723 } 5724 } 5725 } 5726 PetscFunctionReturnVoid(); 5727 } 5728 5729