1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 PetscBool merged; 977 978 PetscFunctionBegin; 979 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 980 /* do nondiagonal part */ 981 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 982 if (!merged) { 983 /* send it on its way */ 984 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 985 /* do local part */ 986 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 987 /* receive remote parts: note this assumes the values are not actually */ 988 /* added in yy until the next line, */ 989 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 } else { 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* send it on its way */ 994 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 /* values actually were received in the Begin() but we need to call this nop */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 /* Easy test: symmetric diagonal block */ 1013 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1014 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1015 if (!*f) PetscFunctionReturn(0); 1016 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1018 if (size == 1) PetscFunctionReturn(0); 1019 1020 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1021 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1022 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1024 for (i=0; i<first; i++) notme[i] = i; 1025 for (i=last; i<M; i++) notme[i-last+first] = i; 1026 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1027 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1028 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1029 Aoff = Aoffs[0]; 1030 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1031 Boff = Boffs[0]; 1032 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1033 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1035 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1037 ierr = PetscFree(notme);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1042 { 1043 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1044 PetscErrorCode ierr; 1045 1046 PetscFunctionBegin; 1047 /* do nondiagonal part */ 1048 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1049 /* send it on its way */ 1050 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1051 /* do local part */ 1052 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1053 /* receive remote parts */ 1054 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 /* 1059 This only works correctly for square matrices where the subblock A->A is the 1060 diagonal block 1061 */ 1062 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1063 { 1064 PetscErrorCode ierr; 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1066 1067 PetscFunctionBegin; 1068 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1069 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1070 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1081 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1086 { 1087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 #if defined(PETSC_USE_LOG) 1092 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1093 #endif 1094 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1095 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1096 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1097 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1098 #if defined(PETSC_USE_CTABLE) 1099 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1100 #else 1101 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1102 #endif 1103 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1104 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1105 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1106 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1107 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1108 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1109 1110 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1111 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1112 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1113 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1114 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1115 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1116 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1117 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1118 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1119 #if defined(PETSC_HAVE_ELEMENTAL) 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1121 #endif 1122 #if defined(PETSC_HAVE_HYPRE) 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1125 #endif 1126 PetscFunctionReturn(0); 1127 } 1128 1129 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1130 { 1131 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1132 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1133 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1134 PetscErrorCode ierr; 1135 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1136 int fd; 1137 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1138 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1139 PetscScalar *column_values; 1140 PetscInt message_count,flowcontrolcount; 1141 FILE *file; 1142 1143 PetscFunctionBegin; 1144 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1145 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1146 nz = A->nz + B->nz; 1147 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1148 if (!rank) { 1149 header[0] = MAT_FILE_CLASSID; 1150 header[1] = mat->rmap->N; 1151 header[2] = mat->cmap->N; 1152 1153 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1154 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1155 /* get largest number of rows any processor has */ 1156 rlen = mat->rmap->n; 1157 range = mat->rmap->range; 1158 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1159 } else { 1160 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 rlen = mat->rmap->n; 1162 } 1163 1164 /* load up the local row counts */ 1165 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1166 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1167 1168 /* store the row lengths to the file */ 1169 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1170 if (!rank) { 1171 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1172 for (i=1; i<size; i++) { 1173 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1174 rlen = range[i+1] - range[i]; 1175 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1176 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1177 } 1178 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1179 } else { 1180 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1181 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1182 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1183 } 1184 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1185 1186 /* load up the local column indices */ 1187 nzmax = nz; /* th processor needs space a largest processor needs */ 1188 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1189 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1190 cnt = 0; 1191 for (i=0; i<mat->rmap->n; i++) { 1192 for (j=B->i[i]; j<B->i[i+1]; j++) { 1193 if ((col = garray[B->j[j]]) > cstart) break; 1194 column_indices[cnt++] = col; 1195 } 1196 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1197 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1198 } 1199 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1200 1201 /* store the column indices to the file */ 1202 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1203 if (!rank) { 1204 MPI_Status status; 1205 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1209 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1210 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1212 } 1213 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1214 } else { 1215 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1216 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1219 } 1220 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1221 1222 /* load up the local column values */ 1223 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1224 cnt = 0; 1225 for (i=0; i<mat->rmap->n; i++) { 1226 for (j=B->i[i]; j<B->i[i+1]; j++) { 1227 if (garray[B->j[j]] > cstart) break; 1228 column_values[cnt++] = B->a[j]; 1229 } 1230 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1231 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1232 } 1233 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1234 1235 /* store the column values to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 MPI_Status status; 1239 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1240 for (i=1; i<size; i++) { 1241 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1242 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1243 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1244 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(column_values);CHKERRQ(ierr); 1255 1256 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1257 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #include <petscdraw.h> 1262 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1265 PetscErrorCode ierr; 1266 PetscMPIInt rank = aij->rank,size = aij->size; 1267 PetscBool isdraw,iascii,isbinary; 1268 PetscViewer sviewer; 1269 PetscViewerFormat format; 1270 1271 PetscFunctionBegin; 1272 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1273 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1274 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1275 if (iascii) { 1276 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1277 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1278 MatInfo info; 1279 PetscBool inodes; 1280 1281 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1282 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1283 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1285 if (!inodes) { 1286 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1287 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1290 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1291 } 1292 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1293 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1294 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1296 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1297 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1299 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1302 PetscInt inodecount,inodelimit,*inodes; 1303 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1304 if (inodes) { 1305 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1311 PetscFunctionReturn(0); 1312 } 1313 } else if (isbinary) { 1314 if (size == 1) { 1315 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1316 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1317 } else { 1318 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1319 } 1320 PetscFunctionReturn(0); 1321 } else if (isdraw) { 1322 PetscDraw draw; 1323 PetscBool isnull; 1324 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1325 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1326 if (isnull) PetscFunctionReturn(0); 1327 } 1328 1329 { 1330 /* assemble the entire matrix onto first processor. */ 1331 Mat A; 1332 Mat_SeqAIJ *Aloc; 1333 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1334 MatScalar *a; 1335 1336 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1337 if (!rank) { 1338 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1339 } else { 1340 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1341 } 1342 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1343 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1344 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1345 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1346 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1347 1348 /* copy over the A part */ 1349 Aloc = (Mat_SeqAIJ*)aij->A->data; 1350 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1351 row = mat->rmap->rstart; 1352 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1353 for (i=0; i<m; i++) { 1354 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1355 row++; 1356 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1357 } 1358 aj = Aloc->j; 1359 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1360 1361 /* copy over the B part */ 1362 Aloc = (Mat_SeqAIJ*)aij->B->data; 1363 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1364 row = mat->rmap->rstart; 1365 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1366 ct = cols; 1367 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1368 for (i=0; i<m; i++) { 1369 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1370 row++; 1371 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1372 } 1373 ierr = PetscFree(ct);CHKERRQ(ierr); 1374 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 /* 1377 Everyone has to call to draw the matrix since the graphics waits are 1378 synchronized across all processors that share the PetscDraw object 1379 */ 1380 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1381 if (!rank) { 1382 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1383 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1384 } 1385 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1386 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1387 ierr = MatDestroy(&A);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1393 { 1394 PetscErrorCode ierr; 1395 PetscBool iascii,isdraw,issocket,isbinary; 1396 1397 PetscFunctionBegin; 1398 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1399 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1400 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1401 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1402 if (iascii || isdraw || isbinary || issocket) { 1403 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1404 } 1405 PetscFunctionReturn(0); 1406 } 1407 1408 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1409 { 1410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1411 PetscErrorCode ierr; 1412 Vec bb1 = 0; 1413 PetscBool hasop; 1414 1415 PetscFunctionBegin; 1416 if (flag == SOR_APPLY_UPPER) { 1417 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1422 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1423 } 1424 1425 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1428 its--; 1429 } 1430 1431 while (its--) { 1432 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1433 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1437 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1438 1439 /* local sweep */ 1440 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1441 } 1442 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1445 its--; 1446 } 1447 while (its--) { 1448 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1449 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1453 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1454 1455 /* local sweep */ 1456 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1457 } 1458 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 while (its--) { 1464 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1465 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1469 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1470 1471 /* local sweep */ 1472 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1473 } 1474 } else if (flag & SOR_EISENSTAT) { 1475 Vec xx1; 1476 1477 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1478 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1479 1480 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1481 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 if (!mat->diag) { 1483 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1484 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1485 } 1486 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1487 if (hasop) { 1488 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1489 } else { 1490 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1491 } 1492 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1493 1494 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1495 1496 /* local sweep */ 1497 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1498 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1499 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1500 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1501 1502 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1503 1504 matin->factorerrortype = mat->A->factorerrortype; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1509 { 1510 Mat aA,aB,Aperm; 1511 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1512 PetscScalar *aa,*ba; 1513 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1514 PetscSF rowsf,sf; 1515 IS parcolp = NULL; 1516 PetscBool done; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1521 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1522 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1523 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1524 1525 /* Invert row permutation to find out where my rows should go */ 1526 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1527 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1528 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1529 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1530 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1531 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1532 1533 /* Invert column permutation to find out where my columns should go */ 1534 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1535 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1536 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1537 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1538 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1539 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1540 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1541 1542 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1543 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1544 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1545 1546 /* Find out where my gcols should go */ 1547 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1548 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1549 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1550 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1551 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1552 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1553 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1557 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1558 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1559 for (i=0; i<m; i++) { 1560 PetscInt row = rdest[i],rowner; 1561 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1562 for (j=ai[i]; j<ai[i+1]; j++) { 1563 PetscInt cowner,col = cdest[aj[j]]; 1564 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 for (j=bi[i]; j<bi[i+1]; j++) { 1569 PetscInt cowner,col = gcdest[bj[j]]; 1570 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1571 if (rowner == cowner) dnnz[i]++; 1572 else onnz[i]++; 1573 } 1574 } 1575 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1576 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1577 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1578 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1579 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1580 1581 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1582 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1583 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1584 for (i=0; i<m; i++) { 1585 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1586 PetscInt j0,rowlen; 1587 rowlen = ai[i+1] - ai[i]; 1588 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1589 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1590 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 rowlen = bi[i+1] - bi[i]; 1593 for (j0=j=0; j<rowlen; j0=j) { 1594 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1595 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1596 } 1597 } 1598 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1599 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1600 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1601 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1602 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1603 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1604 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1605 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1606 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1607 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1608 *B = Aperm; 1609 PetscFunctionReturn(0); 1610 } 1611 1612 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1613 { 1614 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1615 PetscErrorCode ierr; 1616 1617 PetscFunctionBegin; 1618 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1619 if (ghosts) *ghosts = aij->garray; 1620 PetscFunctionReturn(0); 1621 } 1622 1623 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1624 { 1625 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1626 Mat A = mat->A,B = mat->B; 1627 PetscErrorCode ierr; 1628 PetscReal isend[5],irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = 1.0; 1632 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1633 1634 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1635 isend[3] = info->memory; isend[4] = info->mallocs; 1636 1637 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1638 1639 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1640 isend[3] += info->memory; isend[4] += info->mallocs; 1641 if (flag == MAT_LOCAL) { 1642 info->nz_used = isend[0]; 1643 info->nz_allocated = isend[1]; 1644 info->nz_unneeded = isend[2]; 1645 info->memory = isend[3]; 1646 info->mallocs = isend[4]; 1647 } else if (flag == MAT_GLOBAL_MAX) { 1648 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } else if (flag == MAT_GLOBAL_SUM) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } 1664 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1665 info->fill_ratio_needed = 0; 1666 info->factor_mallocs = 0; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1671 { 1672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_NEW_NONZERO_LOCATION_ERR: 1682 case MAT_USE_INODES: 1683 case MAT_IGNORE_ZERO_ENTRIES: 1684 MatCheckPreallocated(A,1); 1685 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1686 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1687 break; 1688 case MAT_ROW_ORIENTED: 1689 MatCheckPreallocated(A,1); 1690 a->roworiented = flg; 1691 1692 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1693 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1694 break; 1695 case MAT_NEW_DIAGONALS: 1696 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1697 break; 1698 case MAT_IGNORE_OFF_PROC_ENTRIES: 1699 a->donotstash = flg; 1700 break; 1701 case MAT_SPD: 1702 A->spd_set = PETSC_TRUE; 1703 A->spd = flg; 1704 if (flg) { 1705 A->symmetric = PETSC_TRUE; 1706 A->structurally_symmetric = PETSC_TRUE; 1707 A->symmetric_set = PETSC_TRUE; 1708 A->structurally_symmetric_set = PETSC_TRUE; 1709 } 1710 break; 1711 case MAT_SYMMETRIC: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 break; 1715 case MAT_STRUCTURALLY_SYMMETRIC: 1716 MatCheckPreallocated(A,1); 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 break; 1719 case MAT_HERMITIAN: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 break; 1723 case MAT_SYMMETRY_ETERNAL: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_SUBMAT_SINGLEIS: 1728 A->submat_singleis = flg; 1729 break; 1730 case MAT_STRUCTURE_ONLY: 1731 /* The option is handled directly by MatSetOption() */ 1732 break; 1733 default: 1734 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1735 } 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1740 { 1741 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1742 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1743 PetscErrorCode ierr; 1744 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1745 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1746 PetscInt *cmap,*idx_p; 1747 1748 PetscFunctionBegin; 1749 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1750 mat->getrowactive = PETSC_TRUE; 1751 1752 if (!mat->rowvalues && (idx || v)) { 1753 /* 1754 allocate enough space to hold information from the longest row. 1755 */ 1756 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1757 PetscInt max = 1,tmp; 1758 for (i=0; i<matin->rmap->n; i++) { 1759 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1760 if (max < tmp) max = tmp; 1761 } 1762 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1763 } 1764 1765 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1766 lrow = row - rstart; 1767 1768 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1769 if (!v) {pvA = 0; pvB = 0;} 1770 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1771 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1772 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1773 nztot = nzA + nzB; 1774 1775 cmap = mat->garray; 1776 if (v || idx) { 1777 if (nztot) { 1778 /* Sort by increasing column numbers, assuming A and B already sorted */ 1779 PetscInt imark = -1; 1780 if (v) { 1781 *v = v_p = mat->rowvalues; 1782 for (i=0; i<nzB; i++) { 1783 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1784 else break; 1785 } 1786 imark = i; 1787 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1788 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1789 } 1790 if (idx) { 1791 *idx = idx_p = mat->rowindices; 1792 if (imark > -1) { 1793 for (i=0; i<imark; i++) { 1794 idx_p[i] = cmap[cworkB[i]]; 1795 } 1796 } else { 1797 for (i=0; i<nzB; i++) { 1798 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1799 else break; 1800 } 1801 imark = i; 1802 } 1803 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1804 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1805 } 1806 } else { 1807 if (idx) *idx = 0; 1808 if (v) *v = 0; 1809 } 1810 } 1811 *nz = nztot; 1812 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1813 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1818 { 1819 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1820 1821 PetscFunctionBegin; 1822 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1823 aij->getrowactive = PETSC_FALSE; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1830 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1831 PetscErrorCode ierr; 1832 PetscInt i,j,cstart = mat->cmap->rstart; 1833 PetscReal sum = 0.0; 1834 MatScalar *v; 1835 1836 PetscFunctionBegin; 1837 if (aij->size == 1) { 1838 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1839 } else { 1840 if (type == NORM_FROBENIUS) { 1841 v = amat->a; 1842 for (i=0; i<amat->nz; i++) { 1843 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1844 } 1845 v = bmat->a; 1846 for (i=0; i<bmat->nz; i++) { 1847 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1848 } 1849 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1850 *norm = PetscSqrtReal(*norm); 1851 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1852 } else if (type == NORM_1) { /* max column norm */ 1853 PetscReal *tmp,*tmp2; 1854 PetscInt *jj,*garray = aij->garray; 1855 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1856 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1857 *norm = 0.0; 1858 v = amat->a; jj = amat->j; 1859 for (j=0; j<amat->nz; j++) { 1860 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1861 } 1862 v = bmat->a; jj = bmat->j; 1863 for (j=0; j<bmat->nz; j++) { 1864 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1865 } 1866 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1867 for (j=0; j<mat->cmap->N; j++) { 1868 if (tmp2[j] > *norm) *norm = tmp2[j]; 1869 } 1870 ierr = PetscFree(tmp);CHKERRQ(ierr); 1871 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1872 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1873 } else if (type == NORM_INFINITY) { /* max row norm */ 1874 PetscReal ntemp = 0.0; 1875 for (j=0; j<aij->A->rmap->n; j++) { 1876 v = amat->a + amat->i[j]; 1877 sum = 0.0; 1878 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1879 sum += PetscAbsScalar(*v); v++; 1880 } 1881 v = bmat->a + bmat->i[j]; 1882 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1883 sum += PetscAbsScalar(*v); v++; 1884 } 1885 if (sum > ntemp) ntemp = sum; 1886 } 1887 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1888 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1889 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1897 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1898 PetscErrorCode ierr; 1899 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1900 PetscInt cstart = A->cmap->rstart,ncol; 1901 Mat B; 1902 MatScalar *array; 1903 1904 PetscFunctionBegin; 1905 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1906 1907 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1908 ai = Aloc->i; aj = Aloc->j; 1909 bi = Bloc->i; bj = Bloc->j; 1910 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1911 PetscInt *d_nnz,*g_nnz,*o_nnz; 1912 PetscSFNode *oloc; 1913 PETSC_UNUSED PetscSF sf; 1914 1915 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1916 /* compute d_nnz for preallocation */ 1917 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1918 for (i=0; i<ai[ma]; i++) { 1919 d_nnz[aj[i]]++; 1920 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1921 } 1922 /* compute local off-diagonal contributions */ 1923 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1924 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1925 /* map those to global */ 1926 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1927 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1928 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1929 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1930 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1931 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1932 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1933 1934 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1935 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1936 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1937 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1938 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1939 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1940 } else { 1941 B = *matout; 1942 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1943 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1944 } 1945 1946 /* copy over the A part */ 1947 array = Aloc->a; 1948 row = A->rmap->rstart; 1949 for (i=0; i<ma; i++) { 1950 ncol = ai[i+1]-ai[i]; 1951 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1952 row++; 1953 array += ncol; aj += ncol; 1954 } 1955 aj = Aloc->j; 1956 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1957 1958 /* copy over the B part */ 1959 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1960 array = Bloc->a; 1961 row = A->rmap->rstart; 1962 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1963 cols_tmp = cols; 1964 for (i=0; i<mb; i++) { 1965 ncol = bi[i+1]-bi[i]; 1966 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1967 row++; 1968 array += ncol; cols_tmp += ncol; 1969 } 1970 ierr = PetscFree(cols);CHKERRQ(ierr); 1971 1972 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1973 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscErrorCode ierr; 1987 PetscInt s1,s2,s3; 1988 1989 PetscFunctionBegin; 1990 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1991 if (rr) { 1992 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1993 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1994 /* Overlap communication with computation. */ 1995 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1996 } 1997 if (ll) { 1998 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1999 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2000 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2001 } 2002 /* scale the diagonal block */ 2003 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2004 2005 if (rr) { 2006 /* Do a scatter end and then right scale the off-diagonal block */ 2007 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2008 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2016 PetscErrorCode ierr; 2017 2018 PetscFunctionBegin; 2019 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2020 PetscFunctionReturn(0); 2021 } 2022 2023 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2024 { 2025 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2026 Mat a,b,c,d; 2027 PetscBool flg; 2028 PetscErrorCode ierr; 2029 2030 PetscFunctionBegin; 2031 a = matA->A; b = matA->B; 2032 c = matB->A; d = matB->B; 2033 2034 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2035 if (flg) { 2036 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2037 } 2038 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2043 { 2044 PetscErrorCode ierr; 2045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2046 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2047 2048 PetscFunctionBegin; 2049 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2050 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2051 /* because of the column compression in the off-processor part of the matrix a->B, 2052 the number of columns in a->B and b->B may be different, hence we cannot call 2053 the MatCopy() directly on the two parts. If need be, we can provide a more 2054 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2055 then copying the submatrices */ 2056 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2057 } else { 2058 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2059 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2060 } 2061 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2066 { 2067 PetscErrorCode ierr; 2068 2069 PetscFunctionBegin; 2070 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 /* 2075 Computes the number of nonzeros per row needed for preallocation when X and Y 2076 have different nonzero structure. 2077 */ 2078 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2079 { 2080 PetscInt i,j,k,nzx,nzy; 2081 2082 PetscFunctionBegin; 2083 /* Set the number of nonzeros in the new matrix */ 2084 for (i=0; i<m; i++) { 2085 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2086 nzx = xi[i+1] - xi[i]; 2087 nzy = yi[i+1] - yi[i]; 2088 nnz[i] = 0; 2089 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2090 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2091 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2092 nnz[i]++; 2093 } 2094 for (; k<nzy; k++) nnz[i]++; 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2101 { 2102 PetscErrorCode ierr; 2103 PetscInt m = Y->rmap->N; 2104 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2105 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2106 2107 PetscFunctionBegin; 2108 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2113 { 2114 PetscErrorCode ierr; 2115 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2116 PetscBLASInt bnz,one=1; 2117 Mat_SeqAIJ *x,*y; 2118 2119 PetscFunctionBegin; 2120 if (str == SAME_NONZERO_PATTERN) { 2121 PetscScalar alpha = a; 2122 x = (Mat_SeqAIJ*)xx->A->data; 2123 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2124 y = (Mat_SeqAIJ*)yy->A->data; 2125 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2126 x = (Mat_SeqAIJ*)xx->B->data; 2127 y = (Mat_SeqAIJ*)yy->B->data; 2128 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2129 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2130 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2131 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2132 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2133 } else { 2134 Mat B; 2135 PetscInt *nnz_d,*nnz_o; 2136 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2138 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2139 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2140 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2141 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2142 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2143 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2144 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2145 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2146 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2147 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2148 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2149 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2150 } 2151 PetscFunctionReturn(0); 2152 } 2153 2154 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2155 2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2157 { 2158 #if defined(PETSC_USE_COMPLEX) 2159 PetscErrorCode ierr; 2160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2161 2162 PetscFunctionBegin; 2163 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2164 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2165 #else 2166 PetscFunctionBegin; 2167 #endif 2168 PetscFunctionReturn(0); 2169 } 2170 2171 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2172 { 2173 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2174 PetscErrorCode ierr; 2175 2176 PetscFunctionBegin; 2177 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2178 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2183 { 2184 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2185 PetscErrorCode ierr; 2186 2187 PetscFunctionBegin; 2188 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2189 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2194 { 2195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2196 PetscErrorCode ierr; 2197 PetscInt i,*idxb = 0; 2198 PetscScalar *va,*vb; 2199 Vec vtmp; 2200 2201 PetscFunctionBegin; 2202 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2203 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2204 if (idx) { 2205 for (i=0; i<A->rmap->n; i++) { 2206 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2207 } 2208 } 2209 2210 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2211 if (idx) { 2212 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2213 } 2214 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2215 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2216 2217 for (i=0; i<A->rmap->n; i++) { 2218 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2219 va[i] = vb[i]; 2220 if (idx) idx[i] = a->garray[idxb[i]]; 2221 } 2222 } 2223 2224 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2225 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2226 ierr = PetscFree(idxb);CHKERRQ(ierr); 2227 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2232 { 2233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2234 PetscErrorCode ierr; 2235 PetscInt i,*idxb = 0; 2236 PetscScalar *va,*vb; 2237 Vec vtmp; 2238 2239 PetscFunctionBegin; 2240 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2241 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2242 if (idx) { 2243 for (i=0; i<A->cmap->n; i++) { 2244 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2245 } 2246 } 2247 2248 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2249 if (idx) { 2250 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2251 } 2252 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2253 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2254 2255 for (i=0; i<A->rmap->n; i++) { 2256 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2257 va[i] = vb[i]; 2258 if (idx) idx[i] = a->garray[idxb[i]]; 2259 } 2260 } 2261 2262 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2263 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2264 ierr = PetscFree(idxb);CHKERRQ(ierr); 2265 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2270 { 2271 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2272 PetscInt n = A->rmap->n; 2273 PetscInt cstart = A->cmap->rstart; 2274 PetscInt *cmap = mat->garray; 2275 PetscInt *diagIdx, *offdiagIdx; 2276 Vec diagV, offdiagV; 2277 PetscScalar *a, *diagA, *offdiagA; 2278 PetscInt r; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2283 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2284 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2285 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2286 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2287 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2288 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2289 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2290 for (r = 0; r < n; ++r) { 2291 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2292 a[r] = diagA[r]; 2293 idx[r] = cstart + diagIdx[r]; 2294 } else { 2295 a[r] = offdiagA[r]; 2296 idx[r] = cmap[offdiagIdx[r]]; 2297 } 2298 } 2299 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2300 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2302 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2303 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2304 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2305 PetscFunctionReturn(0); 2306 } 2307 2308 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2309 { 2310 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2311 PetscInt n = A->rmap->n; 2312 PetscInt cstart = A->cmap->rstart; 2313 PetscInt *cmap = mat->garray; 2314 PetscInt *diagIdx, *offdiagIdx; 2315 Vec diagV, offdiagV; 2316 PetscScalar *a, *diagA, *offdiagA; 2317 PetscInt r; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2322 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2323 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2324 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2325 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2326 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2327 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2328 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2329 for (r = 0; r < n; ++r) { 2330 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2331 a[r] = diagA[r]; 2332 idx[r] = cstart + diagIdx[r]; 2333 } else { 2334 a[r] = offdiagA[r]; 2335 idx[r] = cmap[offdiagIdx[r]]; 2336 } 2337 } 2338 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2339 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2340 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2341 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2342 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2343 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2344 PetscFunctionReturn(0); 2345 } 2346 2347 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2348 { 2349 PetscErrorCode ierr; 2350 Mat *dummy; 2351 2352 PetscFunctionBegin; 2353 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2354 *newmat = *dummy; 2355 ierr = PetscFree(dummy);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2360 { 2361 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2366 A->factorerrortype = a->A->factorerrortype; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2371 { 2372 PetscErrorCode ierr; 2373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2374 2375 PetscFunctionBegin; 2376 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2377 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2378 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2379 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2384 { 2385 PetscFunctionBegin; 2386 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2387 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2388 PetscFunctionReturn(0); 2389 } 2390 2391 /*@ 2392 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2393 2394 Collective on Mat 2395 2396 Input Parameters: 2397 + A - the matrix 2398 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2399 2400 Level: advanced 2401 2402 @*/ 2403 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2404 { 2405 PetscErrorCode ierr; 2406 2407 PetscFunctionBegin; 2408 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2413 { 2414 PetscErrorCode ierr; 2415 PetscBool sc = PETSC_FALSE,flg; 2416 2417 PetscFunctionBegin; 2418 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2419 ierr = PetscObjectOptionsBegin((PetscObject)A); 2420 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2421 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2422 if (flg) { 2423 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2424 } 2425 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2430 { 2431 PetscErrorCode ierr; 2432 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2433 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2434 2435 PetscFunctionBegin; 2436 if (!Y->preallocated) { 2437 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2438 } else if (!aij->nz) { 2439 PetscInt nonew = aij->nonew; 2440 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2441 aij->nonew = nonew; 2442 } 2443 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2448 { 2449 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2450 PetscErrorCode ierr; 2451 2452 PetscFunctionBegin; 2453 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2454 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2455 if (d) { 2456 PetscInt rstart; 2457 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2458 *d += rstart; 2459 2460 } 2461 PetscFunctionReturn(0); 2462 } 2463 2464 2465 /* -------------------------------------------------------------------*/ 2466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2467 MatGetRow_MPIAIJ, 2468 MatRestoreRow_MPIAIJ, 2469 MatMult_MPIAIJ, 2470 /* 4*/ MatMultAdd_MPIAIJ, 2471 MatMultTranspose_MPIAIJ, 2472 MatMultTransposeAdd_MPIAIJ, 2473 0, 2474 0, 2475 0, 2476 /*10*/ 0, 2477 0, 2478 0, 2479 MatSOR_MPIAIJ, 2480 MatTranspose_MPIAIJ, 2481 /*15*/ MatGetInfo_MPIAIJ, 2482 MatEqual_MPIAIJ, 2483 MatGetDiagonal_MPIAIJ, 2484 MatDiagonalScale_MPIAIJ, 2485 MatNorm_MPIAIJ, 2486 /*20*/ MatAssemblyBegin_MPIAIJ, 2487 MatAssemblyEnd_MPIAIJ, 2488 MatSetOption_MPIAIJ, 2489 MatZeroEntries_MPIAIJ, 2490 /*24*/ MatZeroRows_MPIAIJ, 2491 0, 2492 0, 2493 0, 2494 0, 2495 /*29*/ MatSetUp_MPIAIJ, 2496 0, 2497 0, 2498 MatGetDiagonalBlock_MPIAIJ, 2499 0, 2500 /*34*/ MatDuplicate_MPIAIJ, 2501 0, 2502 0, 2503 0, 2504 0, 2505 /*39*/ MatAXPY_MPIAIJ, 2506 MatCreateSubMatrices_MPIAIJ, 2507 MatIncreaseOverlap_MPIAIJ, 2508 MatGetValues_MPIAIJ, 2509 MatCopy_MPIAIJ, 2510 /*44*/ MatGetRowMax_MPIAIJ, 2511 MatScale_MPIAIJ, 2512 MatShift_MPIAIJ, 2513 MatDiagonalSet_MPIAIJ, 2514 MatZeroRowsColumns_MPIAIJ, 2515 /*49*/ MatSetRandom_MPIAIJ, 2516 0, 2517 0, 2518 0, 2519 0, 2520 /*54*/ MatFDColoringCreate_MPIXAIJ, 2521 0, 2522 MatSetUnfactored_MPIAIJ, 2523 MatPermute_MPIAIJ, 2524 0, 2525 /*59*/ MatCreateSubMatrix_MPIAIJ, 2526 MatDestroy_MPIAIJ, 2527 MatView_MPIAIJ, 2528 0, 2529 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2530 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2531 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2532 0, 2533 0, 2534 0, 2535 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2536 MatGetRowMinAbs_MPIAIJ, 2537 0, 2538 0, 2539 0, 2540 0, 2541 /*75*/ MatFDColoringApply_AIJ, 2542 MatSetFromOptions_MPIAIJ, 2543 0, 2544 0, 2545 MatFindZeroDiagonals_MPIAIJ, 2546 /*80*/ 0, 2547 0, 2548 0, 2549 /*83*/ MatLoad_MPIAIJ, 2550 0, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2556 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2557 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2558 MatPtAP_MPIAIJ_MPIAIJ, 2559 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2560 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2561 0, 2562 0, 2563 0, 2564 0, 2565 /*99*/ 0, 2566 0, 2567 0, 2568 MatConjugate_MPIAIJ, 2569 0, 2570 /*104*/MatSetValuesRow_MPIAIJ, 2571 MatRealPart_MPIAIJ, 2572 MatImaginaryPart_MPIAIJ, 2573 0, 2574 0, 2575 /*109*/0, 2576 0, 2577 MatGetRowMin_MPIAIJ, 2578 0, 2579 MatMissingDiagonal_MPIAIJ, 2580 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2581 0, 2582 MatGetGhosts_MPIAIJ, 2583 0, 2584 0, 2585 /*119*/0, 2586 0, 2587 0, 2588 0, 2589 MatGetMultiProcBlock_MPIAIJ, 2590 /*124*/MatFindNonzeroRows_MPIAIJ, 2591 MatGetColumnNorms_MPIAIJ, 2592 MatInvertBlockDiagonal_MPIAIJ, 2593 0, 2594 MatCreateSubMatricesMPI_MPIAIJ, 2595 /*129*/0, 2596 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2597 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2598 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2599 0, 2600 /*134*/0, 2601 0, 2602 MatRARt_MPIAIJ_MPIAIJ, 2603 0, 2604 0, 2605 /*139*/MatSetBlockSizes_MPIAIJ, 2606 0, 2607 0, 2608 MatFDColoringSetUp_MPIXAIJ, 2609 MatFindOffBlockDiagonalEntries_MPIAIJ, 2610 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2611 }; 2612 2613 /* ----------------------------------------------------------------------------------------*/ 2614 2615 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2616 { 2617 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2618 PetscErrorCode ierr; 2619 2620 PetscFunctionBegin; 2621 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2622 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2627 { 2628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2629 PetscErrorCode ierr; 2630 2631 PetscFunctionBegin; 2632 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2633 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2634 PetscFunctionReturn(0); 2635 } 2636 2637 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2638 { 2639 Mat_MPIAIJ *b; 2640 PetscErrorCode ierr; 2641 2642 PetscFunctionBegin; 2643 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2644 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2645 b = (Mat_MPIAIJ*)B->data; 2646 2647 #if defined(PETSC_USE_CTABLE) 2648 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2649 #else 2650 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2651 #endif 2652 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2653 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2654 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2655 2656 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2657 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2658 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2659 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2660 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2661 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2662 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2663 2664 if (!B->preallocated) { 2665 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2666 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2667 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2668 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2669 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2670 } 2671 2672 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2673 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2674 B->preallocated = PETSC_TRUE; 2675 B->was_assembled = PETSC_FALSE; 2676 B->assembled = PETSC_FALSE;; 2677 PetscFunctionReturn(0); 2678 } 2679 2680 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2681 { 2682 Mat_MPIAIJ *b; 2683 PetscErrorCode ierr; 2684 2685 PetscFunctionBegin; 2686 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2687 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2688 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2689 b = (Mat_MPIAIJ*)B->data; 2690 2691 #if defined(PETSC_USE_CTABLE) 2692 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2693 #else 2694 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2695 #endif 2696 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2697 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2698 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2699 2700 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2701 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2702 B->preallocated = PETSC_TRUE; 2703 B->was_assembled = PETSC_FALSE; 2704 B->assembled = PETSC_FALSE; 2705 PetscFunctionReturn(0); 2706 } 2707 2708 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2709 { 2710 Mat mat; 2711 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2712 PetscErrorCode ierr; 2713 2714 PetscFunctionBegin; 2715 *newmat = 0; 2716 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2717 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2718 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2719 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2720 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2721 a = (Mat_MPIAIJ*)mat->data; 2722 2723 mat->factortype = matin->factortype; 2724 mat->assembled = PETSC_TRUE; 2725 mat->insertmode = NOT_SET_VALUES; 2726 mat->preallocated = PETSC_TRUE; 2727 2728 a->size = oldmat->size; 2729 a->rank = oldmat->rank; 2730 a->donotstash = oldmat->donotstash; 2731 a->roworiented = oldmat->roworiented; 2732 a->rowindices = 0; 2733 a->rowvalues = 0; 2734 a->getrowactive = PETSC_FALSE; 2735 2736 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2737 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2738 2739 if (oldmat->colmap) { 2740 #if defined(PETSC_USE_CTABLE) 2741 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2742 #else 2743 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2744 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2745 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2746 #endif 2747 } else a->colmap = 0; 2748 if (oldmat->garray) { 2749 PetscInt len; 2750 len = oldmat->B->cmap->n; 2751 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2752 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2753 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2754 } else a->garray = 0; 2755 2756 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2757 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2758 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2759 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2760 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2761 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2762 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2763 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2764 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2765 *newmat = mat; 2766 PetscFunctionReturn(0); 2767 } 2768 2769 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2770 { 2771 PetscScalar *vals,*svals; 2772 MPI_Comm comm; 2773 PetscErrorCode ierr; 2774 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2775 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2776 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2777 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2778 PetscInt cend,cstart,n,*rowners; 2779 int fd; 2780 PetscInt bs = newMat->rmap->bs; 2781 2782 PetscFunctionBegin; 2783 /* force binary viewer to load .info file if it has not yet done so */ 2784 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2785 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2786 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2787 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2788 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2789 if (!rank) { 2790 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2791 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2792 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2793 } 2794 2795 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2796 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2797 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2798 if (bs < 0) bs = 1; 2799 2800 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2801 M = header[1]; N = header[2]; 2802 2803 /* If global sizes are set, check if they are consistent with that given in the file */ 2804 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2805 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2806 2807 /* determine ownership of all (block) rows */ 2808 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2809 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2810 else m = newMat->rmap->n; /* Set by user */ 2811 2812 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2813 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2814 2815 /* First process needs enough room for process with most rows */ 2816 if (!rank) { 2817 mmax = rowners[1]; 2818 for (i=2; i<=size; i++) { 2819 mmax = PetscMax(mmax, rowners[i]); 2820 } 2821 } else mmax = -1; /* unused, but compilers complain */ 2822 2823 rowners[0] = 0; 2824 for (i=2; i<=size; i++) { 2825 rowners[i] += rowners[i-1]; 2826 } 2827 rstart = rowners[rank]; 2828 rend = rowners[rank+1]; 2829 2830 /* distribute row lengths to all processors */ 2831 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2832 if (!rank) { 2833 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2834 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2835 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2836 for (j=0; j<m; j++) { 2837 procsnz[0] += ourlens[j]; 2838 } 2839 for (i=1; i<size; i++) { 2840 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2841 /* calculate the number of nonzeros on each processor */ 2842 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2843 procsnz[i] += rowlengths[j]; 2844 } 2845 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2846 } 2847 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2848 } else { 2849 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2850 } 2851 2852 if (!rank) { 2853 /* determine max buffer needed and allocate it */ 2854 maxnz = 0; 2855 for (i=0; i<size; i++) { 2856 maxnz = PetscMax(maxnz,procsnz[i]); 2857 } 2858 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2859 2860 /* read in my part of the matrix column indices */ 2861 nz = procsnz[0]; 2862 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2863 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2864 2865 /* read in every one elses and ship off */ 2866 for (i=1; i<size; i++) { 2867 nz = procsnz[i]; 2868 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2869 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2870 } 2871 ierr = PetscFree(cols);CHKERRQ(ierr); 2872 } else { 2873 /* determine buffer space needed for message */ 2874 nz = 0; 2875 for (i=0; i<m; i++) { 2876 nz += ourlens[i]; 2877 } 2878 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2879 2880 /* receive message of column indices*/ 2881 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2882 } 2883 2884 /* determine column ownership if matrix is not square */ 2885 if (N != M) { 2886 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2887 else n = newMat->cmap->n; 2888 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2889 cstart = cend - n; 2890 } else { 2891 cstart = rstart; 2892 cend = rend; 2893 n = cend - cstart; 2894 } 2895 2896 /* loop over local rows, determining number of off diagonal entries */ 2897 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2898 jj = 0; 2899 for (i=0; i<m; i++) { 2900 for (j=0; j<ourlens[i]; j++) { 2901 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2902 jj++; 2903 } 2904 } 2905 2906 for (i=0; i<m; i++) { 2907 ourlens[i] -= offlens[i]; 2908 } 2909 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2910 2911 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2912 2913 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2914 2915 for (i=0; i<m; i++) { 2916 ourlens[i] += offlens[i]; 2917 } 2918 2919 if (!rank) { 2920 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2921 2922 /* read in my part of the matrix numerical values */ 2923 nz = procsnz[0]; 2924 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2925 2926 /* insert into matrix */ 2927 jj = rstart; 2928 smycols = mycols; 2929 svals = vals; 2930 for (i=0; i<m; i++) { 2931 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2932 smycols += ourlens[i]; 2933 svals += ourlens[i]; 2934 jj++; 2935 } 2936 2937 /* read in other processors and ship out */ 2938 for (i=1; i<size; i++) { 2939 nz = procsnz[i]; 2940 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2941 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2942 } 2943 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2944 } else { 2945 /* receive numeric values */ 2946 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2947 2948 /* receive message of values*/ 2949 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2950 2951 /* insert into matrix */ 2952 jj = rstart; 2953 smycols = mycols; 2954 svals = vals; 2955 for (i=0; i<m; i++) { 2956 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2957 smycols += ourlens[i]; 2958 svals += ourlens[i]; 2959 jj++; 2960 } 2961 } 2962 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2963 ierr = PetscFree(vals);CHKERRQ(ierr); 2964 ierr = PetscFree(mycols);CHKERRQ(ierr); 2965 ierr = PetscFree(rowners);CHKERRQ(ierr); 2966 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 /* Not scalable because of ISAllGather() unless getting all columns. */ 2972 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2973 { 2974 PetscErrorCode ierr; 2975 IS iscol_local; 2976 PetscBool isstride; 2977 PetscMPIInt lisstride=0,gisstride; 2978 2979 PetscFunctionBegin; 2980 /* check if we are grabbing all columns*/ 2981 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2982 2983 if (isstride) { 2984 PetscInt start,len,mstart,mlen; 2985 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2986 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2987 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2988 if (mstart == start && mlen-mstart == len) lisstride = 1; 2989 } 2990 2991 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2992 if (gisstride) { 2993 PetscInt N; 2994 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2995 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 2996 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 2997 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 2998 } else { 2999 PetscInt cbs; 3000 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3001 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3002 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3003 } 3004 3005 *isseq = iscol_local; 3006 PetscFunctionReturn(0); 3007 } 3008 3009 /* 3010 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3011 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3012 3013 Input Parameters: 3014 mat - matrix 3015 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3016 i.e., mat->rstart <= isrow[i] < mat->rend 3017 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3018 i.e., mat->cstart <= iscol[i] < mat->cend 3019 Output Parameter: 3020 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3021 iscol_o - sequential column index set for retrieving mat->B 3022 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3023 */ 3024 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3025 { 3026 PetscErrorCode ierr; 3027 Vec x,cmap; 3028 const PetscInt *is_idx; 3029 PetscScalar *xarray,*cmaparray; 3030 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3031 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3032 Mat B=a->B; 3033 Vec lvec=a->lvec,lcmap; 3034 PetscInt i,cstart,cend,Bn=B->cmap->N; 3035 MPI_Comm comm; 3036 3037 PetscFunctionBegin; 3038 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3039 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3040 3041 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3042 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3043 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3044 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3045 3046 /* Get start indices */ 3047 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3048 isstart -= ncols; 3049 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3050 3051 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3052 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3053 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3054 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3055 for (i=0; i<ncols; i++) { 3056 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3057 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3058 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3059 } 3060 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3061 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3062 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3063 3064 /* Get iscol_d */ 3065 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3066 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3067 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3068 3069 /* Get isrow_d */ 3070 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3071 rstart = mat->rmap->rstart; 3072 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3073 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3074 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3075 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3076 3077 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3078 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3079 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3080 3081 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3082 ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3083 3084 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3085 3086 ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3087 ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3088 ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3089 3090 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3091 /* off-process column indices */ 3092 count = 0; 3093 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3094 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3095 3096 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3097 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3098 for (i=0; i<Bn; i++) { 3099 if (PetscRealPart(xarray[i]) > -1.0) { 3100 idx[count] = i; /* local column index in off-diagonal part B */ 3101 cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3102 } 3103 } 3104 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3105 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3106 3107 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3108 /* cannot ensure iscol_o has same blocksize as iscol! */ 3109 3110 ierr = PetscFree(idx);CHKERRQ(ierr); 3111 3112 *garray = cmap1; 3113 3114 ierr = VecDestroy(&x);CHKERRQ(ierr); 3115 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3116 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 3120 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3121 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3122 { 3123 PetscErrorCode ierr; 3124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3125 Mat M = NULL; 3126 MPI_Comm comm; 3127 IS iscol_d,isrow_d,iscol_o; 3128 Mat Asub = NULL,Bsub = NULL; 3129 PetscInt n; 3130 3131 PetscFunctionBegin; 3132 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3133 3134 if (call == MAT_REUSE_MATRIX) { 3135 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3136 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3137 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3138 3139 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3140 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3141 3142 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3143 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3144 3145 /* Update diagonal and off-diagonal portions of submat */ 3146 asub = (Mat_MPIAIJ*)(*submat)->data; 3147 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3148 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3149 if (n) { 3150 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3151 } 3152 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3153 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3154 3155 } else { /* call == MAT_INITIAL_MATRIX) */ 3156 const PetscInt *garray; 3157 PetscInt BsubN; 3158 3159 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3160 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3161 3162 /* Create local submatrices Asub and Bsub */ 3163 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3164 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3165 3166 /* Create submatrix M */ 3167 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3168 3169 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3170 asub = (Mat_MPIAIJ*)M->data; 3171 3172 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3173 n = asub->B->cmap->N; 3174 if (BsubN > n) { 3175 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3176 const PetscInt *idx; 3177 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3178 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3179 3180 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3181 j = 0; 3182 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3183 for (i=0; i<n; i++) { 3184 if (j >= BsubN) break; 3185 while (subgarray[i] > garray[j]) j++; 3186 3187 if (subgarray[i] == garray[j]) { 3188 idx_new[i] = idx[j++]; 3189 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3190 } 3191 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3192 3193 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3194 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3195 3196 } else if (BsubN < n) { 3197 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3198 } 3199 3200 ierr = PetscFree(garray);CHKERRQ(ierr); 3201 *submat = M; 3202 3203 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3204 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3205 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3206 3207 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3208 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3209 3210 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3211 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3212 } 3213 PetscFunctionReturn(0); 3214 } 3215 3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3217 { 3218 PetscErrorCode ierr; 3219 IS iscol_local,isrow_d; 3220 PetscInt csize; 3221 PetscInt n,i,j,start,end; 3222 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3223 MPI_Comm comm; 3224 3225 PetscFunctionBegin; 3226 /* If isrow has same processor distribution as mat, 3227 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3228 if (call == MAT_REUSE_MATRIX) { 3229 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3230 if (isrow_d) { 3231 sameRowDist = PETSC_TRUE; 3232 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3233 } else { 3234 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3235 if (iscol_local) { 3236 sameRowDist = PETSC_TRUE; 3237 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3238 } 3239 } 3240 } else { 3241 /* Check if isrow has same processor distribution as mat */ 3242 sameDist[0] = PETSC_FALSE; 3243 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3244 if (!n) { 3245 sameDist[0] = PETSC_TRUE; 3246 } else { 3247 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3248 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3249 if (i >= start && j < end) { 3250 sameDist[0] = PETSC_TRUE; 3251 } 3252 } 3253 3254 /* Check if iscol has same processor distribution as mat */ 3255 sameDist[1] = PETSC_FALSE; 3256 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3257 if (!n) { 3258 sameDist[1] = PETSC_TRUE; 3259 } else { 3260 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3261 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3262 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3263 } 3264 3265 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3266 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3267 sameRowDist = tsameDist[0]; 3268 } 3269 3270 if (sameRowDist) { 3271 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3272 /* isrow and iscol have same processor distribution as mat */ 3273 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3274 } else { /* sameRowDist */ 3275 /* isrow has same processor distribution as mat */ 3276 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3277 } 3278 PetscFunctionReturn(0); 3279 } 3280 3281 /* General case: iscol -> iscol_local which has global size of iscol */ 3282 if (call == MAT_REUSE_MATRIX) { 3283 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3284 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3285 } else { 3286 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3287 } 3288 3289 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3290 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3291 3292 if (call == MAT_INITIAL_MATRIX) { 3293 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3294 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3295 } 3296 PetscFunctionReturn(0); 3297 } 3298 3299 /*@C 3300 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3301 and "off-diagonal" part of the matrix in CSR format. 3302 3303 Collective on MPI_Comm 3304 3305 Input Parameters: 3306 + comm - MPI communicator 3307 . A - "diagonal" portion of matrix 3308 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3309 - garray - global index of B columns 3310 3311 Output Parameter: 3312 . mat - the matrix, with input A as its local diagonal matrix 3313 Level: advanced 3314 3315 Notes: 3316 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3317 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3318 3319 .seealso: MatCreateMPIAIJWithSplitArrays() 3320 @*/ 3321 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3322 { 3323 PetscErrorCode ierr; 3324 Mat_MPIAIJ *maij; 3325 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3326 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3327 PetscScalar *oa=b->a; 3328 Mat Bnew; 3329 PetscInt m,n,N; 3330 3331 PetscFunctionBegin; 3332 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3333 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3334 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3335 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3336 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3337 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3338 3339 /* Get global columns of mat */ 3340 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3341 3342 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3343 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3344 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3345 maij = (Mat_MPIAIJ*)(*mat)->data; 3346 3347 (*mat)->preallocated = PETSC_TRUE; 3348 3349 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3350 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3351 3352 /* Set A as diagonal portion of *mat */ 3353 maij->A = A; 3354 3355 nz = oi[m]; 3356 for (i=0; i<nz; i++) { 3357 col = oj[i]; 3358 oj[i] = garray[col]; 3359 } 3360 3361 /* Set Bnew as off-diagonal portion of *mat */ 3362 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3363 bnew = (Mat_SeqAIJ*)Bnew->data; 3364 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3365 maij->B = Bnew; 3366 3367 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3368 3369 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3370 b->free_a = PETSC_FALSE; 3371 b->free_ij = PETSC_FALSE; 3372 ierr = MatDestroy(&B);CHKERRQ(ierr); 3373 3374 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3375 bnew->free_a = PETSC_TRUE; 3376 bnew->free_ij = PETSC_TRUE; 3377 3378 /* condense columns of maij->B */ 3379 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3380 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3381 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3382 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3383 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3388 3389 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3390 { 3391 PetscErrorCode ierr; 3392 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3393 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3394 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3395 Mat M,Msub,B=a->B; 3396 MatScalar *aa; 3397 Mat_SeqAIJ *aij; 3398 PetscInt *garray = a->garray,*colsub,Ncols; 3399 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3400 IS iscol_sub,iscmap; 3401 const PetscInt *is_idx,*cmap; 3402 PetscBool allcolumns=PETSC_FALSE; 3403 IS iscol_local=NULL; 3404 MPI_Comm comm; 3405 3406 PetscFunctionBegin; 3407 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3408 3409 if (call == MAT_REUSE_MATRIX) { 3410 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3411 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3412 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3413 3414 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3415 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3416 3417 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3418 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3419 3420 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3421 3422 } else { /* call == MAT_INITIAL_MATRIX) */ 3423 PetscBool flg; 3424 3425 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3426 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3427 3428 /* (1) iscol -> nonscalable iscol_local */ 3429 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3430 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3431 if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols); 3432 3433 /* Check for special case: each processor gets entire matrix columns */ 3434 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3435 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3436 if (allcolumns) { 3437 iscol_sub = iscol_local; 3438 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3439 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3440 3441 } else { 3442 /* (2) iscol_local -> iscol_sub and iscmap */ 3443 PetscInt *idx,*cmap1,k; 3444 3445 /* implementation below requires iscol_local be sorted, it can have duplicate indices */ 3446 ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr); 3447 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet"); 3448 3449 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3450 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3451 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3452 count = 0; 3453 k = 0; 3454 for (i=0; i<Ncols; i++) { 3455 j = is_idx[i]; 3456 if (j >= cstart && j < cend) { 3457 /* diagonal part of mat */ 3458 idx[count] = j; 3459 cmap1[count++] = i; /* column index in submat */ 3460 } else if (Bn) { 3461 /* off-diagonal part of mat */ 3462 if (j == garray[k]) { 3463 idx[count] = j; 3464 cmap1[count++] = i; /* column index in submat */ 3465 } else if (j > garray[k]) { 3466 while (j > garray[k] && k < Bn-1) k++; 3467 if (j == garray[k]) { 3468 idx[count] = j; 3469 cmap1[count++] = i; /* column index in submat */ 3470 } 3471 } 3472 } 3473 } 3474 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3475 3476 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3477 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3478 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3479 3480 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3481 } 3482 3483 /* (3) Create sequential Msub */ 3484 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3485 } 3486 3487 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3488 aij = (Mat_SeqAIJ*)(Msub)->data; 3489 ii = aij->i; 3490 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3491 3492 /* 3493 m - number of local rows 3494 Ncols - number of columns (same on all processors) 3495 rstart - first row in new global matrix generated 3496 */ 3497 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3498 3499 if (call == MAT_INITIAL_MATRIX) { 3500 /* (4) Create parallel newmat */ 3501 PetscMPIInt rank,size; 3502 PetscInt csize; 3503 3504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3505 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3506 3507 /* 3508 Determine the number of non-zeros in the diagonal and off-diagonal 3509 portions of the matrix in order to do correct preallocation 3510 */ 3511 3512 /* first get start and end of "diagonal" columns */ 3513 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3514 if (csize == PETSC_DECIDE) { 3515 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3516 if (mglobal == Ncols) { /* square matrix */ 3517 nlocal = m; 3518 } else { 3519 nlocal = Ncols/size + ((Ncols % size) > rank); 3520 } 3521 } else { 3522 nlocal = csize; 3523 } 3524 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3525 rstart = rend - nlocal; 3526 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3527 3528 /* next, compute all the lengths */ 3529 jj = aij->j; 3530 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3531 olens = dlens + m; 3532 for (i=0; i<m; i++) { 3533 jend = ii[i+1] - ii[i]; 3534 olen = 0; 3535 dlen = 0; 3536 for (j=0; j<jend; j++) { 3537 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3538 else dlen++; 3539 jj++; 3540 } 3541 olens[i] = olen; 3542 dlens[i] = dlen; 3543 } 3544 3545 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3546 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3547 3548 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3549 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3550 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3551 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3552 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3553 ierr = PetscFree(dlens);CHKERRQ(ierr); 3554 3555 } else { /* call == MAT_REUSE_MATRIX */ 3556 M = *newmat; 3557 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3558 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3559 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3560 /* 3561 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3562 rather than the slower MatSetValues(). 3563 */ 3564 M->was_assembled = PETSC_TRUE; 3565 M->assembled = PETSC_FALSE; 3566 } 3567 3568 /* (5) Set values of Msub to *newmat */ 3569 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3570 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3571 3572 jj = aij->j; 3573 aa = aij->a; 3574 for (i=0; i<m; i++) { 3575 row = rstart + i; 3576 nz = ii[i+1] - ii[i]; 3577 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3578 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3579 jj += nz; aa += nz; 3580 } 3581 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3582 3583 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3584 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3585 3586 ierr = PetscFree(colsub);CHKERRQ(ierr); 3587 3588 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3589 if (call == MAT_INITIAL_MATRIX) { 3590 *newmat = M; 3591 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3592 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3593 3594 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3595 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3596 3597 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3598 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3599 3600 if (iscol_local) { 3601 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3602 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3603 } 3604 } 3605 PetscFunctionReturn(0); 3606 } 3607 3608 /* 3609 Not great since it makes two copies of the submatrix, first an SeqAIJ 3610 in local and then by concatenating the local matrices the end result. 3611 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3612 3613 Note: This requires a sequential iscol with all indices. 3614 */ 3615 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3616 { 3617 PetscErrorCode ierr; 3618 PetscMPIInt rank,size; 3619 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3620 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3621 Mat M,Mreuse; 3622 MatScalar *aa,*vwork; 3623 MPI_Comm comm; 3624 Mat_SeqAIJ *aij; 3625 PetscBool colflag,allcolumns=PETSC_FALSE; 3626 3627 PetscFunctionBegin; 3628 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3629 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3630 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3631 3632 /* Check for special case: each processor gets entire matrix columns */ 3633 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3634 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3635 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3636 3637 if (call == MAT_REUSE_MATRIX) { 3638 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3639 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3640 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3641 } else { 3642 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3643 } 3644 3645 /* 3646 m - number of local rows 3647 n - number of columns (same on all processors) 3648 rstart - first row in new global matrix generated 3649 */ 3650 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3651 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3652 if (call == MAT_INITIAL_MATRIX) { 3653 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3654 ii = aij->i; 3655 jj = aij->j; 3656 3657 /* 3658 Determine the number of non-zeros in the diagonal and off-diagonal 3659 portions of the matrix in order to do correct preallocation 3660 */ 3661 3662 /* first get start and end of "diagonal" columns */ 3663 if (csize == PETSC_DECIDE) { 3664 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3665 if (mglobal == n) { /* square matrix */ 3666 nlocal = m; 3667 } else { 3668 nlocal = n/size + ((n % size) > rank); 3669 } 3670 } else { 3671 nlocal = csize; 3672 } 3673 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3674 rstart = rend - nlocal; 3675 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3676 3677 /* next, compute all the lengths */ 3678 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3679 olens = dlens + m; 3680 for (i=0; i<m; i++) { 3681 jend = ii[i+1] - ii[i]; 3682 olen = 0; 3683 dlen = 0; 3684 for (j=0; j<jend; j++) { 3685 if (*jj < rstart || *jj >= rend) olen++; 3686 else dlen++; 3687 jj++; 3688 } 3689 olens[i] = olen; 3690 dlens[i] = dlen; 3691 } 3692 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3693 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3694 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3695 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3696 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3697 ierr = PetscFree(dlens);CHKERRQ(ierr); 3698 } else { 3699 PetscInt ml,nl; 3700 3701 M = *newmat; 3702 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3703 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3704 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3705 /* 3706 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3707 rather than the slower MatSetValues(). 3708 */ 3709 M->was_assembled = PETSC_TRUE; 3710 M->assembled = PETSC_FALSE; 3711 } 3712 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3713 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3714 ii = aij->i; 3715 jj = aij->j; 3716 aa = aij->a; 3717 for (i=0; i<m; i++) { 3718 row = rstart + i; 3719 nz = ii[i+1] - ii[i]; 3720 cwork = jj; jj += nz; 3721 vwork = aa; aa += nz; 3722 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3723 } 3724 3725 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3726 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3727 *newmat = M; 3728 3729 /* save submatrix used in processor for next request */ 3730 if (call == MAT_INITIAL_MATRIX) { 3731 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3732 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3733 } 3734 PetscFunctionReturn(0); 3735 } 3736 3737 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3738 { 3739 PetscInt m,cstart, cend,j,nnz,i,d; 3740 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3741 const PetscInt *JJ; 3742 PetscScalar *values; 3743 PetscErrorCode ierr; 3744 PetscBool nooffprocentries; 3745 3746 PetscFunctionBegin; 3747 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3748 3749 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3750 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3751 m = B->rmap->n; 3752 cstart = B->cmap->rstart; 3753 cend = B->cmap->rend; 3754 rstart = B->rmap->rstart; 3755 3756 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3757 3758 #if defined(PETSC_USE_DEBUGGING) 3759 for (i=0; i<m; i++) { 3760 nnz = Ii[i+1]- Ii[i]; 3761 JJ = J + Ii[i]; 3762 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3763 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3764 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3765 } 3766 #endif 3767 3768 for (i=0; i<m; i++) { 3769 nnz = Ii[i+1]- Ii[i]; 3770 JJ = J + Ii[i]; 3771 nnz_max = PetscMax(nnz_max,nnz); 3772 d = 0; 3773 for (j=0; j<nnz; j++) { 3774 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3775 } 3776 d_nnz[i] = d; 3777 o_nnz[i] = nnz - d; 3778 } 3779 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3780 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3781 3782 if (v) values = (PetscScalar*)v; 3783 else { 3784 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3785 } 3786 3787 for (i=0; i<m; i++) { 3788 ii = i + rstart; 3789 nnz = Ii[i+1]- Ii[i]; 3790 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3791 } 3792 nooffprocentries = B->nooffprocentries; 3793 B->nooffprocentries = PETSC_TRUE; 3794 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 B->nooffprocentries = nooffprocentries; 3797 3798 if (!v) { 3799 ierr = PetscFree(values);CHKERRQ(ierr); 3800 } 3801 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3802 PetscFunctionReturn(0); 3803 } 3804 3805 /*@ 3806 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3807 (the default parallel PETSc format). 3808 3809 Collective on MPI_Comm 3810 3811 Input Parameters: 3812 + B - the matrix 3813 . i - the indices into j for the start of each local row (starts with zero) 3814 . j - the column indices for each local row (starts with zero) 3815 - v - optional values in the matrix 3816 3817 Level: developer 3818 3819 Notes: 3820 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3821 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3822 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3823 3824 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3825 3826 The format which is used for the sparse matrix input, is equivalent to a 3827 row-major ordering.. i.e for the following matrix, the input data expected is 3828 as shown 3829 3830 $ 1 0 0 3831 $ 2 0 3 P0 3832 $ ------- 3833 $ 4 5 6 P1 3834 $ 3835 $ Process0 [P0]: rows_owned=[0,1] 3836 $ i = {0,1,3} [size = nrow+1 = 2+1] 3837 $ j = {0,0,2} [size = 3] 3838 $ v = {1,2,3} [size = 3] 3839 $ 3840 $ Process1 [P1]: rows_owned=[2] 3841 $ i = {0,3} [size = nrow+1 = 1+1] 3842 $ j = {0,1,2} [size = 3] 3843 $ v = {4,5,6} [size = 3] 3844 3845 .keywords: matrix, aij, compressed row, sparse, parallel 3846 3847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3848 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3849 @*/ 3850 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3851 { 3852 PetscErrorCode ierr; 3853 3854 PetscFunctionBegin; 3855 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3856 PetscFunctionReturn(0); 3857 } 3858 3859 /*@C 3860 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3861 (the default parallel PETSc format). For good matrix assembly performance 3862 the user should preallocate the matrix storage by setting the parameters 3863 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3864 performance can be increased by more than a factor of 50. 3865 3866 Collective on MPI_Comm 3867 3868 Input Parameters: 3869 + B - the matrix 3870 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3871 (same value is used for all local rows) 3872 . d_nnz - array containing the number of nonzeros in the various rows of the 3873 DIAGONAL portion of the local submatrix (possibly different for each row) 3874 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3875 The size of this array is equal to the number of local rows, i.e 'm'. 3876 For matrices that will be factored, you must leave room for (and set) 3877 the diagonal entry even if it is zero. 3878 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3879 submatrix (same value is used for all local rows). 3880 - o_nnz - array containing the number of nonzeros in the various rows of the 3881 OFF-DIAGONAL portion of the local submatrix (possibly different for 3882 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3883 structure. The size of this array is equal to the number 3884 of local rows, i.e 'm'. 3885 3886 If the *_nnz parameter is given then the *_nz parameter is ignored 3887 3888 The AIJ format (also called the Yale sparse matrix format or 3889 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3890 storage. The stored row and column indices begin with zero. 3891 See Users-Manual: ch_mat for details. 3892 3893 The parallel matrix is partitioned such that the first m0 rows belong to 3894 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3895 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3896 3897 The DIAGONAL portion of the local submatrix of a processor can be defined 3898 as the submatrix which is obtained by extraction the part corresponding to 3899 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3900 first row that belongs to the processor, r2 is the last row belonging to 3901 the this processor, and c1-c2 is range of indices of the local part of a 3902 vector suitable for applying the matrix to. This is an mxn matrix. In the 3903 common case of a square matrix, the row and column ranges are the same and 3904 the DIAGONAL part is also square. The remaining portion of the local 3905 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3906 3907 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3908 3909 You can call MatGetInfo() to get information on how effective the preallocation was; 3910 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3911 You can also run with the option -info and look for messages with the string 3912 malloc in them to see if additional memory allocation was needed. 3913 3914 Example usage: 3915 3916 Consider the following 8x8 matrix with 34 non-zero values, that is 3917 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3918 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3919 as follows: 3920 3921 .vb 3922 1 2 0 | 0 3 0 | 0 4 3923 Proc0 0 5 6 | 7 0 0 | 8 0 3924 9 0 10 | 11 0 0 | 12 0 3925 ------------------------------------- 3926 13 0 14 | 15 16 17 | 0 0 3927 Proc1 0 18 0 | 19 20 21 | 0 0 3928 0 0 0 | 22 23 0 | 24 0 3929 ------------------------------------- 3930 Proc2 25 26 27 | 0 0 28 | 29 0 3931 30 0 0 | 31 32 33 | 0 34 3932 .ve 3933 3934 This can be represented as a collection of submatrices as: 3935 3936 .vb 3937 A B C 3938 D E F 3939 G H I 3940 .ve 3941 3942 Where the submatrices A,B,C are owned by proc0, D,E,F are 3943 owned by proc1, G,H,I are owned by proc2. 3944 3945 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3946 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3947 The 'M','N' parameters are 8,8, and have the same values on all procs. 3948 3949 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3950 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3951 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3952 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3953 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3954 matrix, ans [DF] as another SeqAIJ matrix. 3955 3956 When d_nz, o_nz parameters are specified, d_nz storage elements are 3957 allocated for every row of the local diagonal submatrix, and o_nz 3958 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3959 One way to choose d_nz and o_nz is to use the max nonzerors per local 3960 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3961 In this case, the values of d_nz,o_nz are: 3962 .vb 3963 proc0 : dnz = 2, o_nz = 2 3964 proc1 : dnz = 3, o_nz = 2 3965 proc2 : dnz = 1, o_nz = 4 3966 .ve 3967 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3968 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3969 for proc3. i.e we are using 12+15+10=37 storage locations to store 3970 34 values. 3971 3972 When d_nnz, o_nnz parameters are specified, the storage is specified 3973 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3974 In the above case the values for d_nnz,o_nnz are: 3975 .vb 3976 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3977 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3978 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3979 .ve 3980 Here the space allocated is sum of all the above values i.e 34, and 3981 hence pre-allocation is perfect. 3982 3983 Level: intermediate 3984 3985 .keywords: matrix, aij, compressed row, sparse, parallel 3986 3987 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3988 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3989 @*/ 3990 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3991 { 3992 PetscErrorCode ierr; 3993 3994 PetscFunctionBegin; 3995 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3996 PetscValidType(B,1); 3997 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3998 PetscFunctionReturn(0); 3999 } 4000 4001 /*@ 4002 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4003 CSR format the local rows. 4004 4005 Collective on MPI_Comm 4006 4007 Input Parameters: 4008 + comm - MPI communicator 4009 . m - number of local rows (Cannot be PETSC_DECIDE) 4010 . n - This value should be the same as the local size used in creating the 4011 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4012 calculated if N is given) For square matrices n is almost always m. 4013 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4014 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4015 . i - row indices 4016 . j - column indices 4017 - a - matrix values 4018 4019 Output Parameter: 4020 . mat - the matrix 4021 4022 Level: intermediate 4023 4024 Notes: 4025 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4026 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4027 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4028 4029 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4030 4031 The format which is used for the sparse matrix input, is equivalent to a 4032 row-major ordering.. i.e for the following matrix, the input data expected is 4033 as shown 4034 4035 $ 1 0 0 4036 $ 2 0 3 P0 4037 $ ------- 4038 $ 4 5 6 P1 4039 $ 4040 $ Process0 [P0]: rows_owned=[0,1] 4041 $ i = {0,1,3} [size = nrow+1 = 2+1] 4042 $ j = {0,0,2} [size = 3] 4043 $ v = {1,2,3} [size = 3] 4044 $ 4045 $ Process1 [P1]: rows_owned=[2] 4046 $ i = {0,3} [size = nrow+1 = 1+1] 4047 $ j = {0,1,2} [size = 3] 4048 $ v = {4,5,6} [size = 3] 4049 4050 .keywords: matrix, aij, compressed row, sparse, parallel 4051 4052 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4053 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4054 @*/ 4055 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4056 { 4057 PetscErrorCode ierr; 4058 4059 PetscFunctionBegin; 4060 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4061 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4062 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4063 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4064 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4065 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4066 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4067 PetscFunctionReturn(0); 4068 } 4069 4070 /*@C 4071 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4072 (the default parallel PETSc format). For good matrix assembly performance 4073 the user should preallocate the matrix storage by setting the parameters 4074 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4075 performance can be increased by more than a factor of 50. 4076 4077 Collective on MPI_Comm 4078 4079 Input Parameters: 4080 + comm - MPI communicator 4081 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4082 This value should be the same as the local size used in creating the 4083 y vector for the matrix-vector product y = Ax. 4084 . n - This value should be the same as the local size used in creating the 4085 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4086 calculated if N is given) For square matrices n is almost always m. 4087 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4088 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4089 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4090 (same value is used for all local rows) 4091 . d_nnz - array containing the number of nonzeros in the various rows of the 4092 DIAGONAL portion of the local submatrix (possibly different for each row) 4093 or NULL, if d_nz is used to specify the nonzero structure. 4094 The size of this array is equal to the number of local rows, i.e 'm'. 4095 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4096 submatrix (same value is used for all local rows). 4097 - o_nnz - array containing the number of nonzeros in the various rows of the 4098 OFF-DIAGONAL portion of the local submatrix (possibly different for 4099 each row) or NULL, if o_nz is used to specify the nonzero 4100 structure. The size of this array is equal to the number 4101 of local rows, i.e 'm'. 4102 4103 Output Parameter: 4104 . A - the matrix 4105 4106 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4107 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4108 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4109 4110 Notes: 4111 If the *_nnz parameter is given then the *_nz parameter is ignored 4112 4113 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4114 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4115 storage requirements for this matrix. 4116 4117 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4118 processor than it must be used on all processors that share the object for 4119 that argument. 4120 4121 The user MUST specify either the local or global matrix dimensions 4122 (possibly both). 4123 4124 The parallel matrix is partitioned across processors such that the 4125 first m0 rows belong to process 0, the next m1 rows belong to 4126 process 1, the next m2 rows belong to process 2 etc.. where 4127 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4128 values corresponding to [m x N] submatrix. 4129 4130 The columns are logically partitioned with the n0 columns belonging 4131 to 0th partition, the next n1 columns belonging to the next 4132 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4133 4134 The DIAGONAL portion of the local submatrix on any given processor 4135 is the submatrix corresponding to the rows and columns m,n 4136 corresponding to the given processor. i.e diagonal matrix on 4137 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4138 etc. The remaining portion of the local submatrix [m x (N-n)] 4139 constitute the OFF-DIAGONAL portion. The example below better 4140 illustrates this concept. 4141 4142 For a square global matrix we define each processor's diagonal portion 4143 to be its local rows and the corresponding columns (a square submatrix); 4144 each processor's off-diagonal portion encompasses the remainder of the 4145 local matrix (a rectangular submatrix). 4146 4147 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4148 4149 When calling this routine with a single process communicator, a matrix of 4150 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4151 type of communicator, use the construction mechanism 4152 .vb 4153 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4154 .ve 4155 4156 $ MatCreate(...,&A); 4157 $ MatSetType(A,MATMPIAIJ); 4158 $ MatSetSizes(A, m,n,M,N); 4159 $ MatMPIAIJSetPreallocation(A,...); 4160 4161 By default, this format uses inodes (identical nodes) when possible. 4162 We search for consecutive rows with the same nonzero structure, thereby 4163 reusing matrix information to achieve increased efficiency. 4164 4165 Options Database Keys: 4166 + -mat_no_inode - Do not use inodes 4167 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4168 - -mat_aij_oneindex - Internally use indexing starting at 1 4169 rather than 0. Note that when calling MatSetValues(), 4170 the user still MUST index entries starting at 0! 4171 4172 4173 Example usage: 4174 4175 Consider the following 8x8 matrix with 34 non-zero values, that is 4176 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4177 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4178 as follows 4179 4180 .vb 4181 1 2 0 | 0 3 0 | 0 4 4182 Proc0 0 5 6 | 7 0 0 | 8 0 4183 9 0 10 | 11 0 0 | 12 0 4184 ------------------------------------- 4185 13 0 14 | 15 16 17 | 0 0 4186 Proc1 0 18 0 | 19 20 21 | 0 0 4187 0 0 0 | 22 23 0 | 24 0 4188 ------------------------------------- 4189 Proc2 25 26 27 | 0 0 28 | 29 0 4190 30 0 0 | 31 32 33 | 0 34 4191 .ve 4192 4193 This can be represented as a collection of submatrices as 4194 4195 .vb 4196 A B C 4197 D E F 4198 G H I 4199 .ve 4200 4201 Where the submatrices A,B,C are owned by proc0, D,E,F are 4202 owned by proc1, G,H,I are owned by proc2. 4203 4204 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4205 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4206 The 'M','N' parameters are 8,8, and have the same values on all procs. 4207 4208 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4209 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4210 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4211 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4212 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4213 matrix, ans [DF] as another SeqAIJ matrix. 4214 4215 When d_nz, o_nz parameters are specified, d_nz storage elements are 4216 allocated for every row of the local diagonal submatrix, and o_nz 4217 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4218 One way to choose d_nz and o_nz is to use the max nonzerors per local 4219 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4220 In this case, the values of d_nz,o_nz are 4221 .vb 4222 proc0 : dnz = 2, o_nz = 2 4223 proc1 : dnz = 3, o_nz = 2 4224 proc2 : dnz = 1, o_nz = 4 4225 .ve 4226 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4227 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4228 for proc3. i.e we are using 12+15+10=37 storage locations to store 4229 34 values. 4230 4231 When d_nnz, o_nnz parameters are specified, the storage is specified 4232 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4233 In the above case the values for d_nnz,o_nnz are 4234 .vb 4235 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4236 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4237 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4238 .ve 4239 Here the space allocated is sum of all the above values i.e 34, and 4240 hence pre-allocation is perfect. 4241 4242 Level: intermediate 4243 4244 .keywords: matrix, aij, compressed row, sparse, parallel 4245 4246 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4247 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4248 @*/ 4249 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4250 { 4251 PetscErrorCode ierr; 4252 PetscMPIInt size; 4253 4254 PetscFunctionBegin; 4255 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4256 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4257 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4258 if (size > 1) { 4259 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4260 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4261 } else { 4262 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4263 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4264 } 4265 PetscFunctionReturn(0); 4266 } 4267 4268 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4269 { 4270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4271 PetscBool flg; 4272 PetscErrorCode ierr; 4273 4274 PetscFunctionBegin; 4275 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4276 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4277 if (Ad) *Ad = a->A; 4278 if (Ao) *Ao = a->B; 4279 if (colmap) *colmap = a->garray; 4280 PetscFunctionReturn(0); 4281 } 4282 4283 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4284 { 4285 PetscErrorCode ierr; 4286 PetscInt m,N,i,rstart,nnz,Ii; 4287 PetscInt *indx; 4288 PetscScalar *values; 4289 4290 PetscFunctionBegin; 4291 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4292 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4293 PetscInt *dnz,*onz,sum,bs,cbs; 4294 4295 if (n == PETSC_DECIDE) { 4296 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4297 } 4298 /* Check sum(n) = N */ 4299 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4300 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4301 4302 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4303 rstart -= m; 4304 4305 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4306 for (i=0; i<m; i++) { 4307 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4308 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4309 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4310 } 4311 4312 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4313 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4314 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4315 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4316 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4317 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4318 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4319 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4320 } 4321 4322 /* numeric phase */ 4323 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4324 for (i=0; i<m; i++) { 4325 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4326 Ii = i + rstart; 4327 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4328 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4329 } 4330 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4331 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4332 PetscFunctionReturn(0); 4333 } 4334 4335 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4336 { 4337 PetscErrorCode ierr; 4338 PetscMPIInt rank; 4339 PetscInt m,N,i,rstart,nnz; 4340 size_t len; 4341 const PetscInt *indx; 4342 PetscViewer out; 4343 char *name; 4344 Mat B; 4345 const PetscScalar *values; 4346 4347 PetscFunctionBegin; 4348 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4349 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4350 /* Should this be the type of the diagonal block of A? */ 4351 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4352 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4353 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4354 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4355 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4356 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4357 for (i=0; i<m; i++) { 4358 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4359 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4360 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4361 } 4362 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4363 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4364 4365 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4366 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4367 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4368 sprintf(name,"%s.%d",outfile,rank); 4369 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4370 ierr = PetscFree(name);CHKERRQ(ierr); 4371 ierr = MatView(B,out);CHKERRQ(ierr); 4372 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4373 ierr = MatDestroy(&B);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4378 { 4379 PetscErrorCode ierr; 4380 Mat_Merge_SeqsToMPI *merge; 4381 PetscContainer container; 4382 4383 PetscFunctionBegin; 4384 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4385 if (container) { 4386 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4387 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4388 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4389 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4390 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4391 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4392 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4393 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4394 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4395 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4396 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4397 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4398 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4399 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4400 ierr = PetscFree(merge);CHKERRQ(ierr); 4401 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4402 } 4403 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4404 PetscFunctionReturn(0); 4405 } 4406 4407 #include <../src/mat/utils/freespace.h> 4408 #include <petscbt.h> 4409 4410 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4411 { 4412 PetscErrorCode ierr; 4413 MPI_Comm comm; 4414 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4415 PetscMPIInt size,rank,taga,*len_s; 4416 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4417 PetscInt proc,m; 4418 PetscInt **buf_ri,**buf_rj; 4419 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4420 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4421 MPI_Request *s_waits,*r_waits; 4422 MPI_Status *status; 4423 MatScalar *aa=a->a; 4424 MatScalar **abuf_r,*ba_i; 4425 Mat_Merge_SeqsToMPI *merge; 4426 PetscContainer container; 4427 4428 PetscFunctionBegin; 4429 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4430 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4431 4432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4433 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4434 4435 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4436 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4437 4438 bi = merge->bi; 4439 bj = merge->bj; 4440 buf_ri = merge->buf_ri; 4441 buf_rj = merge->buf_rj; 4442 4443 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4444 owners = merge->rowmap->range; 4445 len_s = merge->len_s; 4446 4447 /* send and recv matrix values */ 4448 /*-----------------------------*/ 4449 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4450 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4451 4452 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4453 for (proc=0,k=0; proc<size; proc++) { 4454 if (!len_s[proc]) continue; 4455 i = owners[proc]; 4456 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4457 k++; 4458 } 4459 4460 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4461 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4462 ierr = PetscFree(status);CHKERRQ(ierr); 4463 4464 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4465 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4466 4467 /* insert mat values of mpimat */ 4468 /*----------------------------*/ 4469 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4470 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4471 4472 for (k=0; k<merge->nrecv; k++) { 4473 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4474 nrows = *(buf_ri_k[k]); 4475 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4476 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4477 } 4478 4479 /* set values of ba */ 4480 m = merge->rowmap->n; 4481 for (i=0; i<m; i++) { 4482 arow = owners[rank] + i; 4483 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4484 bnzi = bi[i+1] - bi[i]; 4485 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4486 4487 /* add local non-zero vals of this proc's seqmat into ba */ 4488 anzi = ai[arow+1] - ai[arow]; 4489 aj = a->j + ai[arow]; 4490 aa = a->a + ai[arow]; 4491 nextaj = 0; 4492 for (j=0; nextaj<anzi; j++) { 4493 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4494 ba_i[j] += aa[nextaj++]; 4495 } 4496 } 4497 4498 /* add received vals into ba */ 4499 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4500 /* i-th row */ 4501 if (i == *nextrow[k]) { 4502 anzi = *(nextai[k]+1) - *nextai[k]; 4503 aj = buf_rj[k] + *(nextai[k]); 4504 aa = abuf_r[k] + *(nextai[k]); 4505 nextaj = 0; 4506 for (j=0; nextaj<anzi; j++) { 4507 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4508 ba_i[j] += aa[nextaj++]; 4509 } 4510 } 4511 nextrow[k]++; nextai[k]++; 4512 } 4513 } 4514 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4515 } 4516 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4517 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4518 4519 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4520 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4521 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4522 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4523 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4524 PetscFunctionReturn(0); 4525 } 4526 4527 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4528 { 4529 PetscErrorCode ierr; 4530 Mat B_mpi; 4531 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4532 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4533 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4534 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4535 PetscInt len,proc,*dnz,*onz,bs,cbs; 4536 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4537 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4538 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4539 MPI_Status *status; 4540 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4541 PetscBT lnkbt; 4542 Mat_Merge_SeqsToMPI *merge; 4543 PetscContainer container; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4547 4548 /* make sure it is a PETSc comm */ 4549 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4551 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4552 4553 ierr = PetscNew(&merge);CHKERRQ(ierr); 4554 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4555 4556 /* determine row ownership */ 4557 /*---------------------------------------------------------*/ 4558 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4559 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4560 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4561 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4562 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4563 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4564 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4565 4566 m = merge->rowmap->n; 4567 owners = merge->rowmap->range; 4568 4569 /* determine the number of messages to send, their lengths */ 4570 /*---------------------------------------------------------*/ 4571 len_s = merge->len_s; 4572 4573 len = 0; /* length of buf_si[] */ 4574 merge->nsend = 0; 4575 for (proc=0; proc<size; proc++) { 4576 len_si[proc] = 0; 4577 if (proc == rank) { 4578 len_s[proc] = 0; 4579 } else { 4580 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4581 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4582 } 4583 if (len_s[proc]) { 4584 merge->nsend++; 4585 nrows = 0; 4586 for (i=owners[proc]; i<owners[proc+1]; i++) { 4587 if (ai[i+1] > ai[i]) nrows++; 4588 } 4589 len_si[proc] = 2*(nrows+1); 4590 len += len_si[proc]; 4591 } 4592 } 4593 4594 /* determine the number and length of messages to receive for ij-structure */ 4595 /*-------------------------------------------------------------------------*/ 4596 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4597 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4598 4599 /* post the Irecv of j-structure */ 4600 /*-------------------------------*/ 4601 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4602 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4603 4604 /* post the Isend of j-structure */ 4605 /*--------------------------------*/ 4606 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4607 4608 for (proc=0, k=0; proc<size; proc++) { 4609 if (!len_s[proc]) continue; 4610 i = owners[proc]; 4611 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4612 k++; 4613 } 4614 4615 /* receives and sends of j-structure are complete */ 4616 /*------------------------------------------------*/ 4617 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4618 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4619 4620 /* send and recv i-structure */ 4621 /*---------------------------*/ 4622 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4623 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4624 4625 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4626 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4627 for (proc=0,k=0; proc<size; proc++) { 4628 if (!len_s[proc]) continue; 4629 /* form outgoing message for i-structure: 4630 buf_si[0]: nrows to be sent 4631 [1:nrows]: row index (global) 4632 [nrows+1:2*nrows+1]: i-structure index 4633 */ 4634 /*-------------------------------------------*/ 4635 nrows = len_si[proc]/2 - 1; 4636 buf_si_i = buf_si + nrows+1; 4637 buf_si[0] = nrows; 4638 buf_si_i[0] = 0; 4639 nrows = 0; 4640 for (i=owners[proc]; i<owners[proc+1]; i++) { 4641 anzi = ai[i+1] - ai[i]; 4642 if (anzi) { 4643 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4644 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4645 nrows++; 4646 } 4647 } 4648 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4649 k++; 4650 buf_si += len_si[proc]; 4651 } 4652 4653 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4654 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4655 4656 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4657 for (i=0; i<merge->nrecv; i++) { 4658 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4659 } 4660 4661 ierr = PetscFree(len_si);CHKERRQ(ierr); 4662 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4663 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4664 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4665 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4666 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4667 ierr = PetscFree(status);CHKERRQ(ierr); 4668 4669 /* compute a local seq matrix in each processor */ 4670 /*----------------------------------------------*/ 4671 /* allocate bi array and free space for accumulating nonzero column info */ 4672 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4673 bi[0] = 0; 4674 4675 /* create and initialize a linked list */ 4676 nlnk = N+1; 4677 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4678 4679 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4680 len = ai[owners[rank+1]] - ai[owners[rank]]; 4681 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4682 4683 current_space = free_space; 4684 4685 /* determine symbolic info for each local row */ 4686 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4687 4688 for (k=0; k<merge->nrecv; k++) { 4689 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4690 nrows = *buf_ri_k[k]; 4691 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4692 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4693 } 4694 4695 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4696 len = 0; 4697 for (i=0; i<m; i++) { 4698 bnzi = 0; 4699 /* add local non-zero cols of this proc's seqmat into lnk */ 4700 arow = owners[rank] + i; 4701 anzi = ai[arow+1] - ai[arow]; 4702 aj = a->j + ai[arow]; 4703 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4704 bnzi += nlnk; 4705 /* add received col data into lnk */ 4706 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4707 if (i == *nextrow[k]) { /* i-th row */ 4708 anzi = *(nextai[k]+1) - *nextai[k]; 4709 aj = buf_rj[k] + *nextai[k]; 4710 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4711 bnzi += nlnk; 4712 nextrow[k]++; nextai[k]++; 4713 } 4714 } 4715 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4716 4717 /* if free space is not available, make more free space */ 4718 if (current_space->local_remaining<bnzi) { 4719 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4720 nspacedouble++; 4721 } 4722 /* copy data into free space, then initialize lnk */ 4723 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4724 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4725 4726 current_space->array += bnzi; 4727 current_space->local_used += bnzi; 4728 current_space->local_remaining -= bnzi; 4729 4730 bi[i+1] = bi[i] + bnzi; 4731 } 4732 4733 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4734 4735 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4736 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4737 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4738 4739 /* create symbolic parallel matrix B_mpi */ 4740 /*---------------------------------------*/ 4741 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4742 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4743 if (n==PETSC_DECIDE) { 4744 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4745 } else { 4746 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4747 } 4748 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4749 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4750 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4751 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4752 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4753 4754 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4755 B_mpi->assembled = PETSC_FALSE; 4756 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4757 merge->bi = bi; 4758 merge->bj = bj; 4759 merge->buf_ri = buf_ri; 4760 merge->buf_rj = buf_rj; 4761 merge->coi = NULL; 4762 merge->coj = NULL; 4763 merge->owners_co = NULL; 4764 4765 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4766 4767 /* attach the supporting struct to B_mpi for reuse */ 4768 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4769 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4770 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4771 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4772 *mpimat = B_mpi; 4773 4774 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4775 PetscFunctionReturn(0); 4776 } 4777 4778 /*@C 4779 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4780 matrices from each processor 4781 4782 Collective on MPI_Comm 4783 4784 Input Parameters: 4785 + comm - the communicators the parallel matrix will live on 4786 . seqmat - the input sequential matrices 4787 . m - number of local rows (or PETSC_DECIDE) 4788 . n - number of local columns (or PETSC_DECIDE) 4789 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4790 4791 Output Parameter: 4792 . mpimat - the parallel matrix generated 4793 4794 Level: advanced 4795 4796 Notes: 4797 The dimensions of the sequential matrix in each processor MUST be the same. 4798 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4799 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4800 @*/ 4801 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4802 { 4803 PetscErrorCode ierr; 4804 PetscMPIInt size; 4805 4806 PetscFunctionBegin; 4807 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4808 if (size == 1) { 4809 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4810 if (scall == MAT_INITIAL_MATRIX) { 4811 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4812 } else { 4813 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4814 } 4815 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4816 PetscFunctionReturn(0); 4817 } 4818 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4819 if (scall == MAT_INITIAL_MATRIX) { 4820 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4821 } 4822 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4823 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4824 PetscFunctionReturn(0); 4825 } 4826 4827 /*@ 4828 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4829 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4830 with MatGetSize() 4831 4832 Not Collective 4833 4834 Input Parameters: 4835 + A - the matrix 4836 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4837 4838 Output Parameter: 4839 . A_loc - the local sequential matrix generated 4840 4841 Level: developer 4842 4843 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4844 4845 @*/ 4846 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4847 { 4848 PetscErrorCode ierr; 4849 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4850 Mat_SeqAIJ *mat,*a,*b; 4851 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4852 MatScalar *aa,*ba,*cam; 4853 PetscScalar *ca; 4854 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4855 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4856 PetscBool match; 4857 MPI_Comm comm; 4858 PetscMPIInt size; 4859 4860 PetscFunctionBegin; 4861 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4862 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4863 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4864 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4865 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4866 4867 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4868 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4869 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4870 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4871 aa = a->a; ba = b->a; 4872 if (scall == MAT_INITIAL_MATRIX) { 4873 if (size == 1) { 4874 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4875 PetscFunctionReturn(0); 4876 } 4877 4878 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4879 ci[0] = 0; 4880 for (i=0; i<am; i++) { 4881 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4882 } 4883 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4884 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4885 k = 0; 4886 for (i=0; i<am; i++) { 4887 ncols_o = bi[i+1] - bi[i]; 4888 ncols_d = ai[i+1] - ai[i]; 4889 /* off-diagonal portion of A */ 4890 for (jo=0; jo<ncols_o; jo++) { 4891 col = cmap[*bj]; 4892 if (col >= cstart) break; 4893 cj[k] = col; bj++; 4894 ca[k++] = *ba++; 4895 } 4896 /* diagonal portion of A */ 4897 for (j=0; j<ncols_d; j++) { 4898 cj[k] = cstart + *aj++; 4899 ca[k++] = *aa++; 4900 } 4901 /* off-diagonal portion of A */ 4902 for (j=jo; j<ncols_o; j++) { 4903 cj[k] = cmap[*bj++]; 4904 ca[k++] = *ba++; 4905 } 4906 } 4907 /* put together the new matrix */ 4908 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4909 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4910 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4911 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4912 mat->free_a = PETSC_TRUE; 4913 mat->free_ij = PETSC_TRUE; 4914 mat->nonew = 0; 4915 } else if (scall == MAT_REUSE_MATRIX) { 4916 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4917 ci = mat->i; cj = mat->j; cam = mat->a; 4918 for (i=0; i<am; i++) { 4919 /* off-diagonal portion of A */ 4920 ncols_o = bi[i+1] - bi[i]; 4921 for (jo=0; jo<ncols_o; jo++) { 4922 col = cmap[*bj]; 4923 if (col >= cstart) break; 4924 *cam++ = *ba++; bj++; 4925 } 4926 /* diagonal portion of A */ 4927 ncols_d = ai[i+1] - ai[i]; 4928 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4929 /* off-diagonal portion of A */ 4930 for (j=jo; j<ncols_o; j++) { 4931 *cam++ = *ba++; bj++; 4932 } 4933 } 4934 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4935 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4936 PetscFunctionReturn(0); 4937 } 4938 4939 /*@C 4940 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4941 4942 Not Collective 4943 4944 Input Parameters: 4945 + A - the matrix 4946 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 - row, col - index sets of rows and columns to extract (or NULL) 4948 4949 Output Parameter: 4950 . A_loc - the local sequential matrix generated 4951 4952 Level: developer 4953 4954 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4955 4956 @*/ 4957 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4958 { 4959 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4960 PetscErrorCode ierr; 4961 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4962 IS isrowa,iscola; 4963 Mat *aloc; 4964 PetscBool match; 4965 4966 PetscFunctionBegin; 4967 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4968 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4969 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4970 if (!row) { 4971 start = A->rmap->rstart; end = A->rmap->rend; 4972 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4973 } else { 4974 isrowa = *row; 4975 } 4976 if (!col) { 4977 start = A->cmap->rstart; 4978 cmap = a->garray; 4979 nzA = a->A->cmap->n; 4980 nzB = a->B->cmap->n; 4981 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4982 ncols = 0; 4983 for (i=0; i<nzB; i++) { 4984 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4985 else break; 4986 } 4987 imark = i; 4988 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4989 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4990 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4991 } else { 4992 iscola = *col; 4993 } 4994 if (scall != MAT_INITIAL_MATRIX) { 4995 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4996 aloc[0] = *A_loc; 4997 } 4998 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4999 *A_loc = aloc[0]; 5000 ierr = PetscFree(aloc);CHKERRQ(ierr); 5001 if (!row) { 5002 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5003 } 5004 if (!col) { 5005 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5006 } 5007 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 5011 /*@C 5012 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5013 5014 Collective on Mat 5015 5016 Input Parameters: 5017 + A,B - the matrices in mpiaij format 5018 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5019 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5020 5021 Output Parameter: 5022 + rowb, colb - index sets of rows and columns of B to extract 5023 - B_seq - the sequential matrix generated 5024 5025 Level: developer 5026 5027 @*/ 5028 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5029 { 5030 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5031 PetscErrorCode ierr; 5032 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5033 IS isrowb,iscolb; 5034 Mat *bseq=NULL; 5035 5036 PetscFunctionBegin; 5037 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5038 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5039 } 5040 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5041 5042 if (scall == MAT_INITIAL_MATRIX) { 5043 start = A->cmap->rstart; 5044 cmap = a->garray; 5045 nzA = a->A->cmap->n; 5046 nzB = a->B->cmap->n; 5047 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5048 ncols = 0; 5049 for (i=0; i<nzB; i++) { /* row < local row index */ 5050 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5051 else break; 5052 } 5053 imark = i; 5054 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5055 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5056 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5057 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5058 } else { 5059 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5060 isrowb = *rowb; iscolb = *colb; 5061 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5062 bseq[0] = *B_seq; 5063 } 5064 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5065 *B_seq = bseq[0]; 5066 ierr = PetscFree(bseq);CHKERRQ(ierr); 5067 if (!rowb) { 5068 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5069 } else { 5070 *rowb = isrowb; 5071 } 5072 if (!colb) { 5073 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5074 } else { 5075 *colb = iscolb; 5076 } 5077 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5078 PetscFunctionReturn(0); 5079 } 5080 5081 /* 5082 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5083 of the OFF-DIAGONAL portion of local A 5084 5085 Collective on Mat 5086 5087 Input Parameters: 5088 + A,B - the matrices in mpiaij format 5089 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5090 5091 Output Parameter: 5092 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5093 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5094 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5095 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5096 5097 Level: developer 5098 5099 */ 5100 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5101 { 5102 VecScatter_MPI_General *gen_to,*gen_from; 5103 PetscErrorCode ierr; 5104 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5105 Mat_SeqAIJ *b_oth; 5106 VecScatter ctx =a->Mvctx; 5107 MPI_Comm comm; 5108 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5109 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5110 PetscInt *rvalues,*svalues; 5111 MatScalar *b_otha,*bufa,*bufA; 5112 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5113 MPI_Request *rwaits = NULL,*swaits = NULL; 5114 MPI_Status *sstatus,rstatus; 5115 PetscMPIInt jj,size; 5116 PetscInt *cols,sbs,rbs; 5117 PetscScalar *vals; 5118 5119 PetscFunctionBegin; 5120 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5121 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5122 5123 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5124 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5125 } 5126 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5127 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5128 5129 if (size == 1) { 5130 startsj_s = NULL; 5131 bufa_ptr = NULL; 5132 *B_oth = NULL; 5133 PetscFunctionReturn(0); 5134 } 5135 5136 gen_to = (VecScatter_MPI_General*)ctx->todata; 5137 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5138 nrecvs = gen_from->n; 5139 nsends = gen_to->n; 5140 5141 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5142 srow = gen_to->indices; /* local row index to be sent */ 5143 sstarts = gen_to->starts; 5144 sprocs = gen_to->procs; 5145 sstatus = gen_to->sstatus; 5146 sbs = gen_to->bs; 5147 rstarts = gen_from->starts; 5148 rprocs = gen_from->procs; 5149 rbs = gen_from->bs; 5150 5151 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5152 if (scall == MAT_INITIAL_MATRIX) { 5153 /* i-array */ 5154 /*---------*/ 5155 /* post receives */ 5156 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5157 for (i=0; i<nrecvs; i++) { 5158 rowlen = rvalues + rstarts[i]*rbs; 5159 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5160 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5161 } 5162 5163 /* pack the outgoing message */ 5164 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5165 5166 sstartsj[0] = 0; 5167 rstartsj[0] = 0; 5168 len = 0; /* total length of j or a array to be sent */ 5169 k = 0; 5170 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5171 for (i=0; i<nsends; i++) { 5172 rowlen = svalues + sstarts[i]*sbs; 5173 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5174 for (j=0; j<nrows; j++) { 5175 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5176 for (l=0; l<sbs; l++) { 5177 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5178 5179 rowlen[j*sbs+l] = ncols; 5180 5181 len += ncols; 5182 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5183 } 5184 k++; 5185 } 5186 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5187 5188 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5189 } 5190 /* recvs and sends of i-array are completed */ 5191 i = nrecvs; 5192 while (i--) { 5193 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5194 } 5195 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5196 ierr = PetscFree(svalues);CHKERRQ(ierr); 5197 5198 /* allocate buffers for sending j and a arrays */ 5199 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5200 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5201 5202 /* create i-array of B_oth */ 5203 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5204 5205 b_othi[0] = 0; 5206 len = 0; /* total length of j or a array to be received */ 5207 k = 0; 5208 for (i=0; i<nrecvs; i++) { 5209 rowlen = rvalues + rstarts[i]*rbs; 5210 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5211 for (j=0; j<nrows; j++) { 5212 b_othi[k+1] = b_othi[k] + rowlen[j]; 5213 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5214 k++; 5215 } 5216 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5217 } 5218 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5219 5220 /* allocate space for j and a arrrays of B_oth */ 5221 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5222 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5223 5224 /* j-array */ 5225 /*---------*/ 5226 /* post receives of j-array */ 5227 for (i=0; i<nrecvs; i++) { 5228 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5229 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5230 } 5231 5232 /* pack the outgoing message j-array */ 5233 k = 0; 5234 for (i=0; i<nsends; i++) { 5235 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5236 bufJ = bufj+sstartsj[i]; 5237 for (j=0; j<nrows; j++) { 5238 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5239 for (ll=0; ll<sbs; ll++) { 5240 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5241 for (l=0; l<ncols; l++) { 5242 *bufJ++ = cols[l]; 5243 } 5244 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5245 } 5246 } 5247 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5248 } 5249 5250 /* recvs and sends of j-array are completed */ 5251 i = nrecvs; 5252 while (i--) { 5253 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5254 } 5255 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5256 } else if (scall == MAT_REUSE_MATRIX) { 5257 sstartsj = *startsj_s; 5258 rstartsj = *startsj_r; 5259 bufa = *bufa_ptr; 5260 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5261 b_otha = b_oth->a; 5262 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5263 5264 /* a-array */ 5265 /*---------*/ 5266 /* post receives of a-array */ 5267 for (i=0; i<nrecvs; i++) { 5268 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5269 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5270 } 5271 5272 /* pack the outgoing message a-array */ 5273 k = 0; 5274 for (i=0; i<nsends; i++) { 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 bufA = bufa+sstartsj[i]; 5277 for (j=0; j<nrows; j++) { 5278 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5279 for (ll=0; ll<sbs; ll++) { 5280 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5281 for (l=0; l<ncols; l++) { 5282 *bufA++ = vals[l]; 5283 } 5284 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5285 } 5286 } 5287 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5288 } 5289 /* recvs and sends of a-array are completed */ 5290 i = nrecvs; 5291 while (i--) { 5292 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5293 } 5294 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5295 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5296 5297 if (scall == MAT_INITIAL_MATRIX) { 5298 /* put together the new matrix */ 5299 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5300 5301 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5302 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5303 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5304 b_oth->free_a = PETSC_TRUE; 5305 b_oth->free_ij = PETSC_TRUE; 5306 b_oth->nonew = 0; 5307 5308 ierr = PetscFree(bufj);CHKERRQ(ierr); 5309 if (!startsj_s || !bufa_ptr) { 5310 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5311 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5312 } else { 5313 *startsj_s = sstartsj; 5314 *startsj_r = rstartsj; 5315 *bufa_ptr = bufa; 5316 } 5317 } 5318 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5319 PetscFunctionReturn(0); 5320 } 5321 5322 /*@C 5323 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5324 5325 Not Collective 5326 5327 Input Parameters: 5328 . A - The matrix in mpiaij format 5329 5330 Output Parameter: 5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5332 . colmap - A map from global column index to local index into lvec 5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5334 5335 Level: developer 5336 5337 @*/ 5338 #if defined(PETSC_USE_CTABLE) 5339 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5340 #else 5341 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5342 #endif 5343 { 5344 Mat_MPIAIJ *a; 5345 5346 PetscFunctionBegin; 5347 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5348 PetscValidPointer(lvec, 2); 5349 PetscValidPointer(colmap, 3); 5350 PetscValidPointer(multScatter, 4); 5351 a = (Mat_MPIAIJ*) A->data; 5352 if (lvec) *lvec = a->lvec; 5353 if (colmap) *colmap = a->colmap; 5354 if (multScatter) *multScatter = a->Mvctx; 5355 PetscFunctionReturn(0); 5356 } 5357 5358 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5359 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5360 #if defined(PETSC_HAVE_MKL) 5361 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5362 #endif 5363 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5364 #if defined(PETSC_HAVE_ELEMENTAL) 5365 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5366 #endif 5367 #if defined(PETSC_HAVE_HYPRE) 5368 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5369 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5370 #endif 5371 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5372 5373 /* 5374 Computes (B'*A')' since computing B*A directly is untenable 5375 5376 n p p 5377 ( ) ( ) ( ) 5378 m ( A ) * n ( B ) = m ( C ) 5379 ( ) ( ) ( ) 5380 5381 */ 5382 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5383 { 5384 PetscErrorCode ierr; 5385 Mat At,Bt,Ct; 5386 5387 PetscFunctionBegin; 5388 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5389 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5390 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5391 ierr = MatDestroy(&At);CHKERRQ(ierr); 5392 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5393 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5394 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5395 PetscFunctionReturn(0); 5396 } 5397 5398 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5399 { 5400 PetscErrorCode ierr; 5401 PetscInt m=A->rmap->n,n=B->cmap->n; 5402 Mat Cmat; 5403 5404 PetscFunctionBegin; 5405 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5406 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5407 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5408 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5409 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5410 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5411 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5412 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5413 5414 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5415 5416 *C = Cmat; 5417 PetscFunctionReturn(0); 5418 } 5419 5420 /* ----------------------------------------------------------------*/ 5421 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5422 { 5423 PetscErrorCode ierr; 5424 5425 PetscFunctionBegin; 5426 if (scall == MAT_INITIAL_MATRIX) { 5427 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5428 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5429 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5430 } 5431 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5432 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5433 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5434 PetscFunctionReturn(0); 5435 } 5436 5437 /*MC 5438 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5439 5440 Options Database Keys: 5441 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5442 5443 Level: beginner 5444 5445 .seealso: MatCreateAIJ() 5446 M*/ 5447 5448 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5449 { 5450 Mat_MPIAIJ *b; 5451 PetscErrorCode ierr; 5452 PetscMPIInt size; 5453 5454 PetscFunctionBegin; 5455 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5456 5457 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5458 B->data = (void*)b; 5459 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5460 B->assembled = PETSC_FALSE; 5461 B->insertmode = NOT_SET_VALUES; 5462 b->size = size; 5463 5464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5465 5466 /* build cache for off array entries formed */ 5467 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5468 5469 b->donotstash = PETSC_FALSE; 5470 b->colmap = 0; 5471 b->garray = 0; 5472 b->roworiented = PETSC_TRUE; 5473 5474 /* stuff used for matrix vector multiply */ 5475 b->lvec = NULL; 5476 b->Mvctx = NULL; 5477 5478 /* stuff for MatGetRow() */ 5479 b->rowindices = 0; 5480 b->rowvalues = 0; 5481 b->getrowactive = PETSC_FALSE; 5482 5483 /* flexible pointer used in CUSP/CUSPARSE classes */ 5484 b->spptr = NULL; 5485 5486 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5487 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5488 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5489 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5490 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5491 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5492 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5493 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5494 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5495 #if defined(PETSC_HAVE_MKL) 5496 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5497 #endif 5498 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5499 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5500 #if defined(PETSC_HAVE_ELEMENTAL) 5501 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5502 #endif 5503 #if defined(PETSC_HAVE_HYPRE) 5504 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5505 #endif 5506 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5507 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5508 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5509 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5510 #if defined(PETSC_HAVE_HYPRE) 5511 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5512 #endif 5513 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 /*@C 5518 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5519 and "off-diagonal" part of the matrix in CSR format. 5520 5521 Collective on MPI_Comm 5522 5523 Input Parameters: 5524 + comm - MPI communicator 5525 . m - number of local rows (Cannot be PETSC_DECIDE) 5526 . n - This value should be the same as the local size used in creating the 5527 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5528 calculated if N is given) For square matrices n is almost always m. 5529 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5530 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5531 . i - row indices for "diagonal" portion of matrix 5532 . j - column indices 5533 . a - matrix values 5534 . oi - row indices for "off-diagonal" portion of matrix 5535 . oj - column indices 5536 - oa - matrix values 5537 5538 Output Parameter: 5539 . mat - the matrix 5540 5541 Level: advanced 5542 5543 Notes: 5544 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5545 must free the arrays once the matrix has been destroyed and not before. 5546 5547 The i and j indices are 0 based 5548 5549 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5550 5551 This sets local rows and cannot be used to set off-processor values. 5552 5553 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5554 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5555 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5556 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5557 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5558 communication if it is known that only local entries will be set. 5559 5560 .keywords: matrix, aij, compressed row, sparse, parallel 5561 5562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5563 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5564 @*/ 5565 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5566 { 5567 PetscErrorCode ierr; 5568 Mat_MPIAIJ *maij; 5569 5570 PetscFunctionBegin; 5571 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5572 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5573 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5574 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5575 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5576 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5577 maij = (Mat_MPIAIJ*) (*mat)->data; 5578 5579 (*mat)->preallocated = PETSC_TRUE; 5580 5581 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5582 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5583 5584 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5585 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5586 5587 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5588 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5589 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5590 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5591 5592 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5593 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5594 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5595 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5596 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5597 PetscFunctionReturn(0); 5598 } 5599 5600 /* 5601 Special version for direct calls from Fortran 5602 */ 5603 #include <petsc/private/fortranimpl.h> 5604 5605 /* Change these macros so can be used in void function */ 5606 #undef CHKERRQ 5607 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5608 #undef SETERRQ2 5609 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5610 #undef SETERRQ3 5611 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5612 #undef SETERRQ 5613 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5614 5615 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5616 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5617 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5618 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5619 #else 5620 #endif 5621 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5622 { 5623 Mat mat = *mmat; 5624 PetscInt m = *mm, n = *mn; 5625 InsertMode addv = *maddv; 5626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5627 PetscScalar value; 5628 PetscErrorCode ierr; 5629 5630 MatCheckPreallocated(mat,1); 5631 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5632 5633 #if defined(PETSC_USE_DEBUG) 5634 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5635 #endif 5636 { 5637 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5638 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5639 PetscBool roworiented = aij->roworiented; 5640 5641 /* Some Variables required in the macro */ 5642 Mat A = aij->A; 5643 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5644 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5645 MatScalar *aa = a->a; 5646 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5647 Mat B = aij->B; 5648 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5649 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5650 MatScalar *ba = b->a; 5651 5652 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5653 PetscInt nonew = a->nonew; 5654 MatScalar *ap1,*ap2; 5655 5656 PetscFunctionBegin; 5657 for (i=0; i<m; i++) { 5658 if (im[i] < 0) continue; 5659 #if defined(PETSC_USE_DEBUG) 5660 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5661 #endif 5662 if (im[i] >= rstart && im[i] < rend) { 5663 row = im[i] - rstart; 5664 lastcol1 = -1; 5665 rp1 = aj + ai[row]; 5666 ap1 = aa + ai[row]; 5667 rmax1 = aimax[row]; 5668 nrow1 = ailen[row]; 5669 low1 = 0; 5670 high1 = nrow1; 5671 lastcol2 = -1; 5672 rp2 = bj + bi[row]; 5673 ap2 = ba + bi[row]; 5674 rmax2 = bimax[row]; 5675 nrow2 = bilen[row]; 5676 low2 = 0; 5677 high2 = nrow2; 5678 5679 for (j=0; j<n; j++) { 5680 if (roworiented) value = v[i*n+j]; 5681 else value = v[i+j*m]; 5682 if (in[j] >= cstart && in[j] < cend) { 5683 col = in[j] - cstart; 5684 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5685 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5686 } else if (in[j] < 0) continue; 5687 #if defined(PETSC_USE_DEBUG) 5688 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5689 #endif 5690 else { 5691 if (mat->was_assembled) { 5692 if (!aij->colmap) { 5693 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5694 } 5695 #if defined(PETSC_USE_CTABLE) 5696 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5697 col--; 5698 #else 5699 col = aij->colmap[in[j]] - 1; 5700 #endif 5701 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5702 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5703 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5704 col = in[j]; 5705 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5706 B = aij->B; 5707 b = (Mat_SeqAIJ*)B->data; 5708 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5709 rp2 = bj + bi[row]; 5710 ap2 = ba + bi[row]; 5711 rmax2 = bimax[row]; 5712 nrow2 = bilen[row]; 5713 low2 = 0; 5714 high2 = nrow2; 5715 bm = aij->B->rmap->n; 5716 ba = b->a; 5717 } 5718 } else col = in[j]; 5719 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5720 } 5721 } 5722 } else if (!aij->donotstash) { 5723 if (roworiented) { 5724 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5725 } else { 5726 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5727 } 5728 } 5729 } 5730 } 5731 PetscFunctionReturnVoid(); 5732 } 5733 5734