1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatSetLateBlockSizes_MPIAIJ" 47 PetscErrorCode MatSetLateBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (rbs && !cbs) { 54 ierr = MatSetBlockSize(mat->A,rbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSize(mat->B,rbs);CHKERRQ(ierr); 56 } else if (rbs && cbs) { 57 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 58 ierr = MatSetBlockSize(mat->B,rbs);CHKERRQ(ierr); 59 } else SETERRQ2(PetscObjectComm((PetscObject)M),PETSC_ERR_ARG_WRONG,"Cannot set late block sizes %D %D",rbs,cbs); 60 PetscFunctionReturn(0); 61 } 62 63 #undef __FUNCT__ 64 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 65 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 66 { 67 PetscErrorCode ierr; 68 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 69 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 70 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 71 const PetscInt *ia,*ib; 72 const MatScalar *aa,*bb; 73 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 74 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 75 76 PetscFunctionBegin; 77 *keptrows = 0; 78 ia = a->i; 79 ib = b->i; 80 for (i=0; i<m; i++) { 81 na = ia[i+1] - ia[i]; 82 nb = ib[i+1] - ib[i]; 83 if (!na && !nb) { 84 cnt++; 85 goto ok1; 86 } 87 aa = a->a + ia[i]; 88 for (j=0; j<na; j++) { 89 if (aa[j] != 0.0) goto ok1; 90 } 91 bb = b->a + ib[i]; 92 for (j=0; j <nb; j++) { 93 if (bb[j] != 0.0) goto ok1; 94 } 95 cnt++; 96 ok1:; 97 } 98 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 99 if (!n0rows) PetscFunctionReturn(0); 100 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 101 cnt = 0; 102 for (i=0; i<m; i++) { 103 na = ia[i+1] - ia[i]; 104 nb = ib[i+1] - ib[i]; 105 if (!na && !nb) continue; 106 aa = a->a + ia[i]; 107 for (j=0; j<na;j++) { 108 if (aa[j] != 0.0) { 109 rows[cnt++] = rstart + i; 110 goto ok2; 111 } 112 } 113 bb = b->a + ib[i]; 114 for (j=0; j<nb; j++) { 115 if (bb[j] != 0.0) { 116 rows[cnt++] = rstart + i; 117 goto ok2; 118 } 119 } 120 ok2:; 121 } 122 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 128 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 129 { 130 PetscErrorCode ierr; 131 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 132 133 PetscFunctionBegin; 134 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 135 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 136 } else { 137 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 138 } 139 PetscFunctionReturn(0); 140 } 141 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 145 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 146 { 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 148 PetscErrorCode ierr; 149 PetscInt i,rstart,nrows,*rows; 150 151 PetscFunctionBegin; 152 *zrows = NULL; 153 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 154 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 155 for (i=0; i<nrows; i++) rows[i] += rstart; 156 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 162 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 163 { 164 PetscErrorCode ierr; 165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 166 PetscInt i,n,*garray = aij->garray; 167 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 168 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 169 PetscReal *work; 170 171 PetscFunctionBegin; 172 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 173 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 174 if (type == NORM_2) { 175 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 176 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 177 } 178 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 179 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 180 } 181 } else if (type == NORM_1) { 182 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 183 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 184 } 185 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 186 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 187 } 188 } else if (type == NORM_INFINITY) { 189 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 190 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 191 } 192 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 193 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 194 } 195 196 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 197 if (type == NORM_INFINITY) { 198 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 199 } else { 200 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 201 } 202 ierr = PetscFree(work);CHKERRQ(ierr); 203 if (type == NORM_2) { 204 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 205 } 206 PetscFunctionReturn(0); 207 } 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 211 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 212 { 213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 214 IS sis,gis; 215 PetscErrorCode ierr; 216 const PetscInt *isis,*igis; 217 PetscInt n,*iis,nsis,ngis,rstart,i; 218 219 PetscFunctionBegin; 220 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 221 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 222 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 223 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 224 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 226 227 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 228 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 230 n = ngis + nsis; 231 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 232 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 233 for (i=0; i<n; i++) iis[i] += rstart; 234 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 235 236 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 238 ierr = ISDestroy(&sis);CHKERRQ(ierr); 239 ierr = ISDestroy(&gis);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatDistribute_MPIAIJ" 245 /* 246 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 247 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 248 249 Only for square matrices 250 251 Used by a preconditioner, hence PETSC_EXTERN 252 */ 253 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 254 { 255 PetscMPIInt rank,size; 256 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 257 PetscErrorCode ierr; 258 Mat mat; 259 Mat_SeqAIJ *gmata; 260 PetscMPIInt tag; 261 MPI_Status status; 262 PetscBool aij; 263 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 264 265 PetscFunctionBegin; 266 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 267 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 268 if (!rank) { 269 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 270 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 271 } 272 if (reuse == MAT_INITIAL_MATRIX) { 273 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 274 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 275 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 276 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 277 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 278 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 279 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 280 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 281 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 282 283 rowners[0] = 0; 284 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 285 rstart = rowners[rank]; 286 rend = rowners[rank+1]; 287 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 288 if (!rank) { 289 gmata = (Mat_SeqAIJ*) gmat->data; 290 /* send row lengths to all processors */ 291 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 292 for (i=1; i<size; i++) { 293 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 /* determine number diagonal and off-diagonal counts */ 296 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 297 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 298 jj = 0; 299 for (i=0; i<m; i++) { 300 for (j=0; j<dlens[i]; j++) { 301 if (gmata->j[jj] < rstart) ld[i]++; 302 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 303 jj++; 304 } 305 } 306 /* send column indices to other processes */ 307 for (i=1; i<size; i++) { 308 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 309 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 310 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 311 } 312 313 /* send numerical values to other processes */ 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 gmataa = gmata->a; 319 gmataj = gmata->j; 320 321 } else { 322 /* receive row lengths */ 323 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 324 /* receive column indices */ 325 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 326 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 328 /* determine number diagonal and off-diagonal counts */ 329 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 330 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 331 jj = 0; 332 for (i=0; i<m; i++) { 333 for (j=0; j<dlens[i]; j++) { 334 if (gmataj[jj] < rstart) ld[i]++; 335 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 336 jj++; 337 } 338 } 339 /* receive numerical values */ 340 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 342 } 343 /* set preallocation */ 344 for (i=0; i<m; i++) { 345 dlens[i] -= olens[i]; 346 } 347 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 348 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 349 350 for (i=0; i<m; i++) { 351 dlens[i] += olens[i]; 352 } 353 cnt = 0; 354 for (i=0; i<m; i++) { 355 row = rstart + i; 356 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 357 cnt += dlens[i]; 358 } 359 if (rank) { 360 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 361 } 362 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 363 ierr = PetscFree(rowners);CHKERRQ(ierr); 364 365 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 366 367 *inmat = mat; 368 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 369 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 370 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 371 mat = *inmat; 372 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 373 if (!rank) { 374 /* send numerical values to other processes */ 375 gmata = (Mat_SeqAIJ*) gmat->data; 376 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 377 gmataa = gmata->a; 378 for (i=1; i<size; i++) { 379 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 380 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 381 } 382 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 383 } else { 384 /* receive numerical values from process 0*/ 385 nz = Ad->nz + Ao->nz; 386 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 387 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 388 } 389 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 390 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 391 ad = Ad->a; 392 ao = Ao->a; 393 if (mat->rmap->n) { 394 i = 0; 395 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 396 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 397 } 398 for (i=1; i<mat->rmap->n; i++) { 399 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 400 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 401 } 402 i--; 403 if (mat->rmap->n) { 404 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 405 } 406 if (rank) { 407 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 408 } 409 } 410 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 411 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412 PetscFunctionReturn(0); 413 } 414 415 /* 416 Local utility routine that creates a mapping from the global column 417 number to the local number in the off-diagonal part of the local 418 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 419 a slightly higher hash table cost; without it it is not scalable (each processor 420 has an order N integer array but is fast to acess. 421 */ 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 424 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 425 { 426 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 427 PetscErrorCode ierr; 428 PetscInt n = aij->B->cmap->n,i; 429 430 PetscFunctionBegin; 431 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 432 #if defined(PETSC_USE_CTABLE) 433 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 434 for (i=0; i<n; i++) { 435 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 436 } 437 #else 438 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 439 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 440 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 441 #endif 442 PetscFunctionReturn(0); 443 } 444 445 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 446 { \ 447 if (col <= lastcol1) low1 = 0; \ 448 else high1 = nrow1; \ 449 lastcol1 = col;\ 450 while (high1-low1 > 5) { \ 451 t = (low1+high1)/2; \ 452 if (rp1[t] > col) high1 = t; \ 453 else low1 = t; \ 454 } \ 455 for (_i=low1; _i<high1; _i++) { \ 456 if (rp1[_i] > col) break; \ 457 if (rp1[_i] == col) { \ 458 if (addv == ADD_VALUES) ap1[_i] += value; \ 459 else ap1[_i] = value; \ 460 goto a_noinsert; \ 461 } \ 462 } \ 463 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 464 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 465 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 466 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 467 N = nrow1++ - 1; a->nz++; high1++; \ 468 /* shift up all the later entries in this row */ \ 469 for (ii=N; ii>=_i; ii--) { \ 470 rp1[ii+1] = rp1[ii]; \ 471 ap1[ii+1] = ap1[ii]; \ 472 } \ 473 rp1[_i] = col; \ 474 ap1[_i] = value; \ 475 A->nonzerostate++;\ 476 a_noinsert: ; \ 477 ailen[row] = nrow1; \ 478 } 479 480 481 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 482 { \ 483 if (col <= lastcol2) low2 = 0; \ 484 else high2 = nrow2; \ 485 lastcol2 = col; \ 486 while (high2-low2 > 5) { \ 487 t = (low2+high2)/2; \ 488 if (rp2[t] > col) high2 = t; \ 489 else low2 = t; \ 490 } \ 491 for (_i=low2; _i<high2; _i++) { \ 492 if (rp2[_i] > col) break; \ 493 if (rp2[_i] == col) { \ 494 if (addv == ADD_VALUES) ap2[_i] += value; \ 495 else ap2[_i] = value; \ 496 goto b_noinsert; \ 497 } \ 498 } \ 499 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 500 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 501 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 502 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 503 N = nrow2++ - 1; b->nz++; high2++; \ 504 /* shift up all the later entries in this row */ \ 505 for (ii=N; ii>=_i; ii--) { \ 506 rp2[ii+1] = rp2[ii]; \ 507 ap2[ii+1] = ap2[ii]; \ 508 } \ 509 rp2[_i] = col; \ 510 ap2[_i] = value; \ 511 B->nonzerostate++; \ 512 b_noinsert: ; \ 513 bilen[row] = nrow2; \ 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 518 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 519 { 520 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 521 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 522 PetscErrorCode ierr; 523 PetscInt l,*garray = mat->garray,diag; 524 525 PetscFunctionBegin; 526 /* code only works for square matrices A */ 527 528 /* find size of row to the left of the diagonal part */ 529 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 530 row = row - diag; 531 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 532 if (garray[b->j[b->i[row]+l]] > diag) break; 533 } 534 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 535 536 /* diagonal part */ 537 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 538 539 /* right of diagonal part */ 540 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 541 PetscFunctionReturn(0); 542 } 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValues_MPIAIJ" 546 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 PetscScalar value; 550 PetscErrorCode ierr; 551 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 552 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 553 PetscBool roworiented = aij->roworiented; 554 555 /* Some Variables required in the macro */ 556 Mat A = aij->A; 557 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 558 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 559 MatScalar *aa = a->a; 560 PetscBool ignorezeroentries = a->ignorezeroentries; 561 Mat B = aij->B; 562 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 563 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 564 MatScalar *ba = b->a; 565 566 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 567 PetscInt nonew; 568 MatScalar *ap1,*ap2; 569 570 PetscFunctionBegin; 571 for (i=0; i<m; i++) { 572 if (im[i] < 0) continue; 573 #if defined(PETSC_USE_DEBUG) 574 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 575 #endif 576 if (im[i] >= rstart && im[i] < rend) { 577 row = im[i] - rstart; 578 lastcol1 = -1; 579 rp1 = aj + ai[row]; 580 ap1 = aa + ai[row]; 581 rmax1 = aimax[row]; 582 nrow1 = ailen[row]; 583 low1 = 0; 584 high1 = nrow1; 585 lastcol2 = -1; 586 rp2 = bj + bi[row]; 587 ap2 = ba + bi[row]; 588 rmax2 = bimax[row]; 589 nrow2 = bilen[row]; 590 low2 = 0; 591 high2 = nrow2; 592 593 for (j=0; j<n; j++) { 594 if (roworiented) value = v[i*n+j]; 595 else value = v[i+j*m]; 596 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 597 if (in[j] >= cstart && in[j] < cend) { 598 col = in[j] - cstart; 599 nonew = a->nonew; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 632 } else col = in[j]; 633 nonew = b->nonew; 634 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 635 } 636 } 637 } else { 638 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 639 if (!aij->donotstash) { 640 mat->assembled = PETSC_FALSE; 641 if (roworiented) { 642 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 643 } else { 644 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 645 } 646 } 647 } 648 } 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatGetValues_MPIAIJ" 654 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 PetscErrorCode ierr; 658 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 659 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 660 661 PetscFunctionBegin; 662 for (i=0; i<m; i++) { 663 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 664 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 665 if (idxm[i] >= rstart && idxm[i] < rend) { 666 row = idxm[i] - rstart; 667 for (j=0; j<n; j++) { 668 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 669 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 670 if (idxn[j] >= cstart && idxn[j] < cend) { 671 col = idxn[j] - cstart; 672 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 673 } else { 674 if (!aij->colmap) { 675 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 676 } 677 #if defined(PETSC_USE_CTABLE) 678 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 679 col--; 680 #else 681 col = aij->colmap[idxn[j]] - 1; 682 #endif 683 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 684 else { 685 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 686 } 687 } 688 } 689 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 690 } 691 PetscFunctionReturn(0); 692 } 693 694 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 695 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 698 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 699 { 700 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 701 PetscErrorCode ierr; 702 PetscInt nstash,reallocs; 703 704 PetscFunctionBegin; 705 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 706 707 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 708 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 709 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 715 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 716 { 717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 718 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 719 PetscErrorCode ierr; 720 PetscMPIInt n; 721 PetscInt i,j,rstart,ncols,flg; 722 PetscInt *row,*col; 723 PetscBool other_disassembled; 724 PetscScalar *val; 725 726 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 727 728 PetscFunctionBegin; 729 if (!aij->donotstash && !mat->nooffprocentries) { 730 while (1) { 731 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 732 if (!flg) break; 733 734 for (i=0; i<n; ) { 735 /* Now identify the consecutive vals belonging to the same row */ 736 for (j=i,rstart=row[j]; j<n; j++) { 737 if (row[j] != rstart) break; 738 } 739 if (j < n) ncols = j-i; 740 else ncols = n-i; 741 /* Now assemble all these values with a single function call */ 742 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 743 744 i = j; 745 } 746 } 747 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 748 } 749 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 751 752 /* determine if any processor has disassembled, if so we must 753 also disassemble ourselfs, in order that we may reassemble. */ 754 /* 755 if nonzero structure of submatrix B cannot change then we know that 756 no processor disassembled thus we can skip this stuff 757 */ 758 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 759 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 760 if (mat->was_assembled && !other_disassembled) { 761 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 762 } 763 } 764 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 765 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 766 } 767 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 768 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 769 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 770 771 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 772 773 aij->rowvalues = 0; 774 775 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 776 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 777 778 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 779 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 780 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 781 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 788 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 789 { 790 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 791 PetscErrorCode ierr; 792 793 PetscFunctionBegin; 794 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 795 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 796 PetscFunctionReturn(0); 797 } 798 799 #undef __FUNCT__ 800 #define __FUNCT__ "MatZeroRows_MPIAIJ" 801 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 802 { 803 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 804 PetscInt *lrows; 805 PetscInt r, len; 806 PetscErrorCode ierr; 807 808 PetscFunctionBegin; 809 /* get locally owned rows */ 810 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 811 /* fix right hand side if needed */ 812 if (x && b) { 813 const PetscScalar *xx; 814 PetscScalar *bb; 815 816 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 817 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 818 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 819 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 820 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 821 } 822 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 823 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 824 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 825 PetscBool cong; 826 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 827 if (cong) A->congruentlayouts = 1; 828 else A->congruentlayouts = 0; 829 } 830 if ((diag != 0.0) && A->congruentlayouts) { 831 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 832 } else if (diag != 0.0) { 833 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 834 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 835 for (r = 0; r < len; ++r) { 836 const PetscInt row = lrows[r] + A->rmap->rstart; 837 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 838 } 839 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 840 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 841 } else { 842 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 843 } 844 ierr = PetscFree(lrows);CHKERRQ(ierr); 845 846 /* only change matrix nonzero state if pattern was allowed to be changed */ 847 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 848 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 849 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 856 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 857 { 858 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 859 PetscErrorCode ierr; 860 PetscMPIInt n = A->rmap->n; 861 PetscInt i,j,r,m,p = 0,len = 0; 862 PetscInt *lrows,*owners = A->rmap->range; 863 PetscSFNode *rrows; 864 PetscSF sf; 865 const PetscScalar *xx; 866 PetscScalar *bb,*mask; 867 Vec xmask,lmask; 868 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 869 const PetscInt *aj, *ii,*ridx; 870 PetscScalar *aa; 871 872 PetscFunctionBegin; 873 /* Create SF where leaves are input rows and roots are owned rows */ 874 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 875 for (r = 0; r < n; ++r) lrows[r] = -1; 876 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 877 for (r = 0; r < N; ++r) { 878 const PetscInt idx = rows[r]; 879 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 880 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 881 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 882 } 883 rrows[r].rank = p; 884 rrows[r].index = rows[r] - owners[p]; 885 } 886 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 887 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 888 /* Collect flags for rows to be zeroed */ 889 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 890 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 891 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 892 /* Compress and put in row numbers */ 893 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 894 /* zero diagonal part of matrix */ 895 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 896 /* handle off diagonal part of matrix */ 897 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 898 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 899 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 900 for (i=0; i<len; i++) bb[lrows[i]] = 1; 901 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 902 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 903 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 905 if (x) { 906 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 909 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 910 } 911 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 912 /* remove zeroed rows of off diagonal matrix */ 913 ii = aij->i; 914 for (i=0; i<len; i++) { 915 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 916 } 917 /* loop over all elements of off process part of matrix zeroing removed columns*/ 918 if (aij->compressedrow.use) { 919 m = aij->compressedrow.nrows; 920 ii = aij->compressedrow.i; 921 ridx = aij->compressedrow.rindex; 922 for (i=0; i<m; i++) { 923 n = ii[i+1] - ii[i]; 924 aj = aij->j + ii[i]; 925 aa = aij->a + ii[i]; 926 927 for (j=0; j<n; j++) { 928 if (PetscAbsScalar(mask[*aj])) { 929 if (b) bb[*ridx] -= *aa*xx[*aj]; 930 *aa = 0.0; 931 } 932 aa++; 933 aj++; 934 } 935 ridx++; 936 } 937 } else { /* do not use compressed row format */ 938 m = l->B->rmap->n; 939 for (i=0; i<m; i++) { 940 n = ii[i+1] - ii[i]; 941 aj = aij->j + ii[i]; 942 aa = aij->a + ii[i]; 943 for (j=0; j<n; j++) { 944 if (PetscAbsScalar(mask[*aj])) { 945 if (b) bb[i] -= *aa*xx[*aj]; 946 *aa = 0.0; 947 } 948 aa++; 949 aj++; 950 } 951 } 952 } 953 if (x) { 954 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 955 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 956 } 957 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 958 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 961 /* only change matrix nonzero state if pattern was allowed to be changed */ 962 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 963 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 964 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 965 } 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMult_MPIAIJ" 971 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 PetscInt nt; 976 977 PetscFunctionBegin; 978 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 979 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 980 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 982 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 PetscErrorCode ierr; 993 994 PetscFunctionBegin; 995 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 999 #undef __FUNCT__ 1000 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1001 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1002 { 1003 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1004 PetscErrorCode ierr; 1005 1006 PetscFunctionBegin; 1007 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1016 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1017 { 1018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1019 PetscErrorCode ierr; 1020 PetscBool merged; 1021 1022 PetscFunctionBegin; 1023 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1024 /* do nondiagonal part */ 1025 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1026 if (!merged) { 1027 /* send it on its way */ 1028 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1029 /* do local part */ 1030 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1031 /* receive remote parts: note this assumes the values are not actually */ 1032 /* added in yy until the next line, */ 1033 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1034 } else { 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* send it on its way */ 1038 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 /* values actually were received in the Begin() but we need to call this nop */ 1040 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1047 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1048 { 1049 MPI_Comm comm; 1050 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1051 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1052 IS Me,Notme; 1053 PetscErrorCode ierr; 1054 PetscInt M,N,first,last,*notme,i; 1055 PetscMPIInt size; 1056 1057 PetscFunctionBegin; 1058 /* Easy test: symmetric diagonal block */ 1059 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1060 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1061 if (!*f) PetscFunctionReturn(0); 1062 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1064 if (size == 1) PetscFunctionReturn(0); 1065 1066 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1067 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1068 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1069 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1070 for (i=0; i<first; i++) notme[i] = i; 1071 for (i=last; i<M; i++) notme[i-last+first] = i; 1072 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1073 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1074 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1075 Aoff = Aoffs[0]; 1076 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1077 Boff = Boffs[0]; 1078 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1079 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1080 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1081 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1082 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1083 ierr = PetscFree(notme);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1089 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1097 /* send it on its way */ 1098 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1099 /* do local part */ 1100 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1101 /* receive remote parts */ 1102 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* 1107 This only works correctly for square matrices where the subblock A->A is the 1108 diagonal block 1109 */ 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1112 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1113 { 1114 PetscErrorCode ierr; 1115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1116 1117 PetscFunctionBegin; 1118 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1119 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1120 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatScale_MPIAIJ" 1126 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1133 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1149 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1150 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1152 #if defined(PETSC_USE_CTABLE) 1153 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1154 #else 1155 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1156 #endif 1157 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1158 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1159 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1160 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1161 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1162 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1163 1164 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1172 #if defined(PETSC_HAVE_ELEMENTAL) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1174 #endif 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1181 { 1182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1183 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1184 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1185 PetscErrorCode ierr; 1186 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1187 int fd; 1188 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1189 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1190 PetscScalar *column_values; 1191 PetscInt message_count,flowcontrolcount; 1192 FILE *file; 1193 1194 PetscFunctionBegin; 1195 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1197 nz = A->nz + B->nz; 1198 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1199 if (!rank) { 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = mat->rmap->N; 1202 header[2] = mat->cmap->N; 1203 1204 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1205 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 /* get largest number of rows any processor has */ 1207 rlen = mat->rmap->n; 1208 range = mat->rmap->range; 1209 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1210 } else { 1211 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 rlen = mat->rmap->n; 1213 } 1214 1215 /* load up the local row counts */ 1216 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1217 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1218 1219 /* store the row lengths to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 rlen = range[i+1] - range[i]; 1226 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1236 1237 /* load up the local column indices */ 1238 nzmax = nz; /* th processor needs space a largest processor needs */ 1239 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1241 cnt = 0; 1242 for (i=0; i<mat->rmap->n; i++) { 1243 for (j=B->i[i]; j<B->i[i+1]; j++) { 1244 if ((col = garray[B->j[j]]) > cstart) break; 1245 column_indices[cnt++] = col; 1246 } 1247 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1248 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column indices to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1272 1273 /* load up the local column values */ 1274 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1275 cnt = 0; 1276 for (i=0; i<mat->rmap->n; i++) { 1277 for (j=B->i[i]; j<B->i[i+1]; j++) { 1278 if (garray[B->j[j]] > cstart) break; 1279 column_values[cnt++] = B->a[j]; 1280 } 1281 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1282 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1283 } 1284 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1285 1286 /* store the column values to the file */ 1287 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1288 if (!rank) { 1289 MPI_Status status; 1290 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1291 for (i=1; i<size; i++) { 1292 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1293 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1294 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1295 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1297 } 1298 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1299 } else { 1300 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1301 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1304 } 1305 ierr = PetscFree(column_values);CHKERRQ(ierr); 1306 1307 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1308 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #include <petscdraw.h> 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1315 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1316 { 1317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1318 PetscErrorCode ierr; 1319 PetscMPIInt rank = aij->rank,size = aij->size; 1320 PetscBool isdraw,iascii,isbinary; 1321 PetscViewer sviewer; 1322 PetscViewerFormat format; 1323 1324 PetscFunctionBegin; 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1328 if (iascii) { 1329 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1330 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1331 MatInfo info; 1332 PetscBool inodes; 1333 1334 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1335 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1336 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1337 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1338 if (!inodes) { 1339 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1340 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1343 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1344 } 1345 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1347 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1348 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1349 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1352 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1355 PetscInt inodecount,inodelimit,*inodes; 1356 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1357 if (inodes) { 1358 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1361 } 1362 PetscFunctionReturn(0); 1363 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1364 PetscFunctionReturn(0); 1365 } 1366 } else if (isbinary) { 1367 if (size == 1) { 1368 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1369 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1370 } else { 1371 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1372 } 1373 PetscFunctionReturn(0); 1374 } else if (isdraw) { 1375 PetscDraw draw; 1376 PetscBool isnull; 1377 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1378 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1379 if (isnull) PetscFunctionReturn(0); 1380 } 1381 1382 { 1383 /* assemble the entire matrix onto first processor. */ 1384 Mat A; 1385 Mat_SeqAIJ *Aloc; 1386 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1387 MatScalar *a; 1388 1389 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1390 if (!rank) { 1391 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1394 } 1395 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1396 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1397 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1398 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1399 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1400 1401 /* copy over the A part */ 1402 Aloc = (Mat_SeqAIJ*)aij->A->data; 1403 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1404 row = mat->rmap->rstart; 1405 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; 1409 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1410 } 1411 aj = Aloc->j; 1412 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1413 1414 /* copy over the B part */ 1415 Aloc = (Mat_SeqAIJ*)aij->B->data; 1416 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1417 row = mat->rmap->rstart; 1418 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1419 ct = cols; 1420 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; 1424 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 /* 1430 Everyone has to call to draw the matrix since the graphics waits are 1431 synchronized across all processors that share the PetscDraw object 1432 */ 1433 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1434 if (!rank) { 1435 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1436 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1439 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1440 ierr = MatDestroy(&A);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatView_MPIAIJ" 1447 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1448 { 1449 PetscErrorCode ierr; 1450 PetscBool iascii,isdraw,issocket,isbinary; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1457 if (iascii || isdraw || isbinary || issocket) { 1458 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatSOR_MPIAIJ" 1465 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1466 { 1467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1468 PetscErrorCode ierr; 1469 Vec bb1 = 0; 1470 PetscBool hasop; 1471 1472 PetscFunctionBegin; 1473 if (flag == SOR_APPLY_UPPER) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1479 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1480 } 1481 1482 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 1488 while (its--) { 1489 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 1492 /* update rhs: bb1 = bb - B*x */ 1493 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1495 1496 /* local sweep */ 1497 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1498 } 1499 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1500 if (flag & SOR_ZERO_INITIAL_GUESS) { 1501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1502 its--; 1503 } 1504 while (its--) { 1505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 1508 /* update rhs: bb1 = bb - B*x */ 1509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1511 1512 /* local sweep */ 1513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1514 } 1515 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 while (its--) { 1521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 1524 /* update rhs: bb1 = bb - B*x */ 1525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1527 1528 /* local sweep */ 1529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1530 } 1531 } else if (flag & SOR_EISENSTAT) { 1532 Vec xx1; 1533 1534 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1535 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1536 1537 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 if (!mat->diag) { 1540 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1541 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1542 } 1543 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1544 if (hasop) { 1545 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1546 } else { 1547 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1548 } 1549 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1550 1551 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1552 1553 /* local sweep */ 1554 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1555 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1556 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1557 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1558 1559 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1560 1561 matin->factorerrortype = mat->A->factorerrortype; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatPermute_MPIAIJ" 1567 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1568 { 1569 Mat aA,aB,Aperm; 1570 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1571 PetscScalar *aa,*ba; 1572 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1573 PetscSF rowsf,sf; 1574 IS parcolp = NULL; 1575 PetscBool done; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1580 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1581 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1582 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1583 1584 /* Invert row permutation to find out where my rows should go */ 1585 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1586 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1587 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1589 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1590 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1591 1592 /* Invert column permutation to find out where my columns should go */ 1593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1594 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1595 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1596 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1597 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1599 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1600 1601 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1603 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1604 1605 /* Find out where my gcols should go */ 1606 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1607 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1608 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1609 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1610 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1611 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1614 1615 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1616 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1617 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1618 for (i=0; i<m; i++) { 1619 PetscInt row = rdest[i],rowner; 1620 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1621 for (j=ai[i]; j<ai[i+1]; j++) { 1622 PetscInt cowner,col = cdest[aj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 for (j=bi[i]; j<bi[i+1]; j++) { 1628 PetscInt cowner,col = gcdest[bj[j]]; 1629 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1630 if (rowner == cowner) dnnz[i]++; 1631 else onnz[i]++; 1632 } 1633 } 1634 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1635 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1636 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1637 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1639 1640 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1641 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1643 for (i=0; i<m; i++) { 1644 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1645 PetscInt j0,rowlen; 1646 rowlen = ai[i+1] - ai[i]; 1647 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1648 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1649 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1650 } 1651 rowlen = bi[i+1] - bi[i]; 1652 for (j0=j=0; j<rowlen; j0=j) { 1653 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1654 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 } 1657 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1660 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1661 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1662 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1663 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1664 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1665 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1666 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1667 *B = Aperm; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1673 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1674 { 1675 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1680 if (ghosts) *ghosts = aij->garray; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1687 { 1688 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1689 Mat A = mat->A,B = mat->B; 1690 PetscErrorCode ierr; 1691 PetscReal isend[5],irecv[5]; 1692 1693 PetscFunctionBegin; 1694 info->block_size = 1.0; 1695 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1696 1697 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1698 isend[3] = info->memory; isend[4] = info->mallocs; 1699 1700 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1701 1702 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1703 isend[3] += info->memory; isend[4] += info->mallocs; 1704 if (flag == MAT_LOCAL) { 1705 info->nz_used = isend[0]; 1706 info->nz_allocated = isend[1]; 1707 info->nz_unneeded = isend[2]; 1708 info->memory = isend[3]; 1709 info->mallocs = isend[4]; 1710 } else if (flag == MAT_GLOBAL_MAX) { 1711 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1712 1713 info->nz_used = irecv[0]; 1714 info->nz_allocated = irecv[1]; 1715 info->nz_unneeded = irecv[2]; 1716 info->memory = irecv[3]; 1717 info->mallocs = irecv[4]; 1718 } else if (flag == MAT_GLOBAL_SUM) { 1719 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1720 1721 info->nz_used = irecv[0]; 1722 info->nz_allocated = irecv[1]; 1723 info->nz_unneeded = irecv[2]; 1724 info->memory = irecv[3]; 1725 info->mallocs = irecv[4]; 1726 } 1727 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1728 info->fill_ratio_needed = 0; 1729 info->factor_mallocs = 0; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetOption_MPIAIJ" 1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 switch (op) { 1742 case MAT_NEW_NONZERO_LOCATIONS: 1743 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1744 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1745 case MAT_KEEP_NONZERO_PATTERN: 1746 case MAT_NEW_NONZERO_LOCATION_ERR: 1747 case MAT_USE_INODES: 1748 case MAT_IGNORE_ZERO_ENTRIES: 1749 MatCheckPreallocated(A,1); 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_ROW_ORIENTED: 1754 MatCheckPreallocated(A,1); 1755 a->roworiented = flg; 1756 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_NEW_DIAGONALS: 1761 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1762 break; 1763 case MAT_IGNORE_OFF_PROC_ENTRIES: 1764 a->donotstash = flg; 1765 break; 1766 case MAT_SPD: 1767 A->spd_set = PETSC_TRUE; 1768 A->spd = flg; 1769 if (flg) { 1770 A->symmetric = PETSC_TRUE; 1771 A->structurally_symmetric = PETSC_TRUE; 1772 A->symmetric_set = PETSC_TRUE; 1773 A->structurally_symmetric_set = PETSC_TRUE; 1774 } 1775 break; 1776 case MAT_SYMMETRIC: 1777 MatCheckPreallocated(A,1); 1778 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1779 break; 1780 case MAT_STRUCTURALLY_SYMMETRIC: 1781 MatCheckPreallocated(A,1); 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_HERMITIAN: 1785 MatCheckPreallocated(A,1); 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 case MAT_SYMMETRY_ETERNAL: 1789 MatCheckPreallocated(A,1); 1790 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1791 break; 1792 default: 1793 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatGetRow_MPIAIJ" 1800 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1801 { 1802 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1803 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1804 PetscErrorCode ierr; 1805 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1806 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1807 PetscInt *cmap,*idx_p; 1808 1809 PetscFunctionBegin; 1810 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1811 mat->getrowactive = PETSC_TRUE; 1812 1813 if (!mat->rowvalues && (idx || v)) { 1814 /* 1815 allocate enough space to hold information from the longest row. 1816 */ 1817 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1818 PetscInt max = 1,tmp; 1819 for (i=0; i<matin->rmap->n; i++) { 1820 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1821 if (max < tmp) max = tmp; 1822 } 1823 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1824 } 1825 1826 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1827 lrow = row - rstart; 1828 1829 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1830 if (!v) {pvA = 0; pvB = 0;} 1831 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1832 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1833 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1834 nztot = nzA + nzB; 1835 1836 cmap = mat->garray; 1837 if (v || idx) { 1838 if (nztot) { 1839 /* Sort by increasing column numbers, assuming A and B already sorted */ 1840 PetscInt imark = -1; 1841 if (v) { 1842 *v = v_p = mat->rowvalues; 1843 for (i=0; i<nzB; i++) { 1844 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1845 else break; 1846 } 1847 imark = i; 1848 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1849 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1850 } 1851 if (idx) { 1852 *idx = idx_p = mat->rowindices; 1853 if (imark > -1) { 1854 for (i=0; i<imark; i++) { 1855 idx_p[i] = cmap[cworkB[i]]; 1856 } 1857 } else { 1858 for (i=0; i<nzB; i++) { 1859 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1860 else break; 1861 } 1862 imark = i; 1863 } 1864 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1865 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1866 } 1867 } else { 1868 if (idx) *idx = 0; 1869 if (v) *v = 0; 1870 } 1871 } 1872 *nz = nztot; 1873 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1874 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1880 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1881 { 1882 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1883 1884 PetscFunctionBegin; 1885 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1886 aij->getrowactive = PETSC_FALSE; 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatNorm_MPIAIJ" 1892 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1893 { 1894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1895 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1896 PetscErrorCode ierr; 1897 PetscInt i,j,cstart = mat->cmap->rstart; 1898 PetscReal sum = 0.0; 1899 MatScalar *v; 1900 1901 PetscFunctionBegin; 1902 if (aij->size == 1) { 1903 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1904 } else { 1905 if (type == NORM_FROBENIUS) { 1906 v = amat->a; 1907 for (i=0; i<amat->nz; i++) { 1908 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1909 } 1910 v = bmat->a; 1911 for (i=0; i<bmat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1915 *norm = PetscSqrtReal(*norm); 1916 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1917 } else if (type == NORM_1) { /* max column norm */ 1918 PetscReal *tmp,*tmp2; 1919 PetscInt *jj,*garray = aij->garray; 1920 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1921 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1922 *norm = 0.0; 1923 v = amat->a; jj = amat->j; 1924 for (j=0; j<amat->nz; j++) { 1925 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1926 } 1927 v = bmat->a; jj = bmat->j; 1928 for (j=0; j<bmat->nz; j++) { 1929 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1930 } 1931 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1932 for (j=0; j<mat->cmap->N; j++) { 1933 if (tmp2[j] > *norm) *norm = tmp2[j]; 1934 } 1935 ierr = PetscFree(tmp);CHKERRQ(ierr); 1936 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1937 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1938 } else if (type == NORM_INFINITY) { /* max row norm */ 1939 PetscReal ntemp = 0.0; 1940 for (j=0; j<aij->A->rmap->n; j++) { 1941 v = amat->a + amat->i[j]; 1942 sum = 0.0; 1943 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1944 sum += PetscAbsScalar(*v); v++; 1945 } 1946 v = bmat->a + bmat->i[j]; 1947 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1948 sum += PetscAbsScalar(*v); v++; 1949 } 1950 if (sum > ntemp) ntemp = sum; 1951 } 1952 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1953 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1954 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1955 } 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatTranspose_MPIAIJ" 1961 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1962 { 1963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1964 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1965 PetscErrorCode ierr; 1966 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1967 PetscInt cstart = A->cmap->rstart,ncol; 1968 Mat B; 1969 MatScalar *array; 1970 1971 PetscFunctionBegin; 1972 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1973 1974 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1975 ai = Aloc->i; aj = Aloc->j; 1976 bi = Bloc->i; bj = Bloc->j; 1977 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1978 PetscInt *d_nnz,*g_nnz,*o_nnz; 1979 PetscSFNode *oloc; 1980 PETSC_UNUSED PetscSF sf; 1981 1982 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1983 /* compute d_nnz for preallocation */ 1984 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1985 for (i=0; i<ai[ma]; i++) { 1986 d_nnz[aj[i]]++; 1987 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1988 } 1989 /* compute local off-diagonal contributions */ 1990 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1991 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1992 /* map those to global */ 1993 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1994 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1995 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1996 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1997 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1998 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1999 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2000 2001 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2002 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2003 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2004 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2005 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2006 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2007 } else { 2008 B = *matout; 2009 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2010 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2011 } 2012 2013 /* copy over the A part */ 2014 array = Aloc->a; 2015 row = A->rmap->rstart; 2016 for (i=0; i<ma; i++) { 2017 ncol = ai[i+1]-ai[i]; 2018 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2019 row++; 2020 array += ncol; aj += ncol; 2021 } 2022 aj = Aloc->j; 2023 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2024 2025 /* copy over the B part */ 2026 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2027 array = Bloc->a; 2028 row = A->rmap->rstart; 2029 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2030 cols_tmp = cols; 2031 for (i=0; i<mb; i++) { 2032 ncol = bi[i+1]-bi[i]; 2033 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2034 row++; 2035 array += ncol; cols_tmp += ncol; 2036 } 2037 ierr = PetscFree(cols);CHKERRQ(ierr); 2038 2039 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2040 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2042 *matout = B; 2043 } else { 2044 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2045 } 2046 PetscFunctionReturn(0); 2047 } 2048 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2051 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2052 { 2053 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2054 Mat a = aij->A,b = aij->B; 2055 PetscErrorCode ierr; 2056 PetscInt s1,s2,s3; 2057 2058 PetscFunctionBegin; 2059 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2060 if (rr) { 2061 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2062 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2063 /* Overlap communication with computation. */ 2064 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 } 2066 if (ll) { 2067 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2068 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2069 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2070 } 2071 /* scale the diagonal block */ 2072 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2073 2074 if (rr) { 2075 /* Do a scatter end and then right scale the off-diagonal block */ 2076 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2077 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2084 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2085 { 2086 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatEqual_MPIAIJ" 2096 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2097 { 2098 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2099 Mat a,b,c,d; 2100 PetscBool flg; 2101 PetscErrorCode ierr; 2102 2103 PetscFunctionBegin; 2104 a = matA->A; b = matA->B; 2105 c = matB->A; d = matB->B; 2106 2107 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2108 if (flg) { 2109 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2110 } 2111 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatCopy_MPIAIJ" 2117 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2118 { 2119 PetscErrorCode ierr; 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2122 2123 PetscFunctionBegin; 2124 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2125 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2126 /* because of the column compression in the off-processor part of the matrix a->B, 2127 the number of columns in a->B and b->B may be different, hence we cannot call 2128 the MatCopy() directly on the two parts. If need be, we can provide a more 2129 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2130 then copying the submatrices */ 2131 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2132 } else { 2133 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2134 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2135 } 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatSetUp_MPIAIJ" 2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2142 { 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* 2151 Computes the number of nonzeros per row needed for preallocation when X and Y 2152 have different nonzero structure. 2153 */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2156 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2157 { 2158 PetscInt i,j,k,nzx,nzy; 2159 2160 PetscFunctionBegin; 2161 /* Set the number of nonzeros in the new matrix */ 2162 for (i=0; i<m; i++) { 2163 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2164 nzx = xi[i+1] - xi[i]; 2165 nzy = yi[i+1] - yi[i]; 2166 nnz[i] = 0; 2167 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2168 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2169 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2170 nnz[i]++; 2171 } 2172 for (; k<nzy; k++) nnz[i]++; 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2180 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2181 { 2182 PetscErrorCode ierr; 2183 PetscInt m = Y->rmap->N; 2184 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2185 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2186 2187 PetscFunctionBegin; 2188 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2189 PetscFunctionReturn(0); 2190 } 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatAXPY_MPIAIJ" 2194 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2195 { 2196 PetscErrorCode ierr; 2197 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2198 PetscBLASInt bnz,one=1; 2199 Mat_SeqAIJ *x,*y; 2200 2201 PetscFunctionBegin; 2202 if (str == SAME_NONZERO_PATTERN) { 2203 PetscScalar alpha = a; 2204 x = (Mat_SeqAIJ*)xx->A->data; 2205 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2206 y = (Mat_SeqAIJ*)yy->A->data; 2207 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2208 x = (Mat_SeqAIJ*)xx->B->data; 2209 y = (Mat_SeqAIJ*)yy->B->data; 2210 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2211 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2212 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2213 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2214 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2215 } else { 2216 Mat B; 2217 PetscInt *nnz_d,*nnz_o; 2218 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2219 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2220 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2221 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2222 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2223 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2224 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2225 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2226 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2227 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2228 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2229 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2230 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2231 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatConjugate_MPIAIJ" 2240 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2241 { 2242 #if defined(PETSC_USE_COMPLEX) 2243 PetscErrorCode ierr; 2244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2245 2246 PetscFunctionBegin; 2247 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2248 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2249 #else 2250 PetscFunctionBegin; 2251 #endif 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatRealPart_MPIAIJ" 2257 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2258 { 2259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2264 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2270 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2271 { 2272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2273 PetscErrorCode ierr; 2274 2275 PetscFunctionBegin; 2276 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2277 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2283 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 PetscInt i,*idxb = 0; 2288 PetscScalar *va,*vb; 2289 Vec vtmp; 2290 2291 PetscFunctionBegin; 2292 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2293 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2294 if (idx) { 2295 for (i=0; i<A->rmap->n; i++) { 2296 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2297 } 2298 } 2299 2300 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2301 if (idx) { 2302 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2303 } 2304 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2305 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2306 2307 for (i=0; i<A->rmap->n; i++) { 2308 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2309 va[i] = vb[i]; 2310 if (idx) idx[i] = a->garray[idxb[i]]; 2311 } 2312 } 2313 2314 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2316 ierr = PetscFree(idxb);CHKERRQ(ierr); 2317 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2323 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 PetscInt i,*idxb = 0; 2328 PetscScalar *va,*vb; 2329 Vec vtmp; 2330 2331 PetscFunctionBegin; 2332 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2333 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2334 if (idx) { 2335 for (i=0; i<A->cmap->n; i++) { 2336 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2337 } 2338 } 2339 2340 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2341 if (idx) { 2342 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2343 } 2344 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2345 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2346 2347 for (i=0; i<A->rmap->n; i++) { 2348 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2349 va[i] = vb[i]; 2350 if (idx) idx[i] = a->garray[idxb[i]]; 2351 } 2352 } 2353 2354 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2355 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2356 ierr = PetscFree(idxb);CHKERRQ(ierr); 2357 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2363 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2364 { 2365 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2366 PetscInt n = A->rmap->n; 2367 PetscInt cstart = A->cmap->rstart; 2368 PetscInt *cmap = mat->garray; 2369 PetscInt *diagIdx, *offdiagIdx; 2370 Vec diagV, offdiagV; 2371 PetscScalar *a, *diagA, *offdiagA; 2372 PetscInt r; 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2377 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2378 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2379 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2380 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2381 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2382 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2383 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2384 for (r = 0; r < n; ++r) { 2385 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2386 a[r] = diagA[r]; 2387 idx[r] = cstart + diagIdx[r]; 2388 } else { 2389 a[r] = offdiagA[r]; 2390 idx[r] = cmap[offdiagIdx[r]]; 2391 } 2392 } 2393 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2394 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2395 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2396 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2397 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2398 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2404 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2405 { 2406 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2407 PetscInt n = A->rmap->n; 2408 PetscInt cstart = A->cmap->rstart; 2409 PetscInt *cmap = mat->garray; 2410 PetscInt *diagIdx, *offdiagIdx; 2411 Vec diagV, offdiagV; 2412 PetscScalar *a, *diagA, *offdiagA; 2413 PetscInt r; 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2418 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2419 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2420 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2421 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2423 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2424 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2425 for (r = 0; r < n; ++r) { 2426 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2427 a[r] = diagA[r]; 2428 idx[r] = cstart + diagIdx[r]; 2429 } else { 2430 a[r] = offdiagA[r]; 2431 idx[r] = cmap[offdiagIdx[r]]; 2432 } 2433 } 2434 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2438 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2439 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 #undef __FUNCT__ 2444 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2445 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2446 { 2447 PetscErrorCode ierr; 2448 Mat *dummy; 2449 2450 PetscFunctionBegin; 2451 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2452 *newmat = *dummy; 2453 ierr = PetscFree(dummy);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 #undef __FUNCT__ 2458 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2459 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2460 { 2461 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2462 PetscErrorCode ierr; 2463 2464 PetscFunctionBegin; 2465 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2466 A->factorerrortype = a->A->factorerrortype; 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2472 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2473 { 2474 PetscErrorCode ierr; 2475 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2476 2477 PetscFunctionBegin; 2478 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2479 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2480 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2481 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2487 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2488 { 2489 PetscFunctionBegin; 2490 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2491 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2497 /*@ 2498 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2499 2500 Collective on Mat 2501 2502 Input Parameters: 2503 + A - the matrix 2504 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2505 2506 Level: advanced 2507 2508 @*/ 2509 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2510 { 2511 PetscErrorCode ierr; 2512 2513 PetscFunctionBegin; 2514 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517 2518 #undef __FUNCT__ 2519 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2520 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2521 { 2522 PetscErrorCode ierr; 2523 PetscBool sc = PETSC_FALSE,flg; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2527 ierr = PetscObjectOptionsBegin((PetscObject)A); 2528 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2529 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2530 if (flg) { 2531 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2532 } 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 #undef __FUNCT__ 2538 #define __FUNCT__ "MatShift_MPIAIJ" 2539 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2540 { 2541 PetscErrorCode ierr; 2542 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2543 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2544 2545 PetscFunctionBegin; 2546 if (!Y->preallocated) { 2547 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2548 } else if (!aij->nz) { 2549 PetscInt nonew = aij->nonew; 2550 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2551 aij->nonew = nonew; 2552 } 2553 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2559 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2560 { 2561 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2566 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2567 if (d) { 2568 PetscInt rstart; 2569 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2570 *d += rstart; 2571 2572 } 2573 PetscFunctionReturn(0); 2574 } 2575 2576 2577 /* -------------------------------------------------------------------*/ 2578 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2579 MatGetRow_MPIAIJ, 2580 MatRestoreRow_MPIAIJ, 2581 MatMult_MPIAIJ, 2582 /* 4*/ MatMultAdd_MPIAIJ, 2583 MatMultTranspose_MPIAIJ, 2584 MatMultTransposeAdd_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 /*10*/ 0, 2589 0, 2590 0, 2591 MatSOR_MPIAIJ, 2592 MatTranspose_MPIAIJ, 2593 /*15*/ MatGetInfo_MPIAIJ, 2594 MatEqual_MPIAIJ, 2595 MatGetDiagonal_MPIAIJ, 2596 MatDiagonalScale_MPIAIJ, 2597 MatNorm_MPIAIJ, 2598 /*20*/ MatAssemblyBegin_MPIAIJ, 2599 MatAssemblyEnd_MPIAIJ, 2600 MatSetOption_MPIAIJ, 2601 MatZeroEntries_MPIAIJ, 2602 /*24*/ MatZeroRows_MPIAIJ, 2603 0, 2604 0, 2605 0, 2606 0, 2607 /*29*/ MatSetUp_MPIAIJ, 2608 0, 2609 0, 2610 MatGetDiagonalBlock_MPIAIJ, 2611 0, 2612 /*34*/ MatDuplicate_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*39*/ MatAXPY_MPIAIJ, 2618 MatGetSubMatrices_MPIAIJ, 2619 MatIncreaseOverlap_MPIAIJ, 2620 MatGetValues_MPIAIJ, 2621 MatCopy_MPIAIJ, 2622 /*44*/ MatGetRowMax_MPIAIJ, 2623 MatScale_MPIAIJ, 2624 MatShift_MPIAIJ, 2625 MatDiagonalSet_MPIAIJ, 2626 MatZeroRowsColumns_MPIAIJ, 2627 /*49*/ MatSetRandom_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /*54*/ MatFDColoringCreate_MPIXAIJ, 2633 0, 2634 MatSetUnfactored_MPIAIJ, 2635 MatPermute_MPIAIJ, 2636 0, 2637 /*59*/ MatGetSubMatrix_MPIAIJ, 2638 MatDestroy_MPIAIJ, 2639 MatView_MPIAIJ, 2640 0, 2641 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2642 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2643 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2648 MatGetRowMinAbs_MPIAIJ, 2649 0, 2650 MatSetLateBlockSizes_MPIAIJ, 2651 0, 2652 0, 2653 /*75*/ MatFDColoringApply_AIJ, 2654 MatSetFromOptions_MPIAIJ, 2655 0, 2656 0, 2657 MatFindZeroDiagonals_MPIAIJ, 2658 /*80*/ 0, 2659 0, 2660 0, 2661 /*83*/ MatLoad_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2668 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2669 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2670 MatPtAP_MPIAIJ_MPIAIJ, 2671 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2672 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*99*/ 0, 2678 0, 2679 0, 2680 MatConjugate_MPIAIJ, 2681 0, 2682 /*104*/MatSetValuesRow_MPIAIJ, 2683 MatRealPart_MPIAIJ, 2684 MatImaginaryPart_MPIAIJ, 2685 0, 2686 0, 2687 /*109*/0, 2688 0, 2689 MatGetRowMin_MPIAIJ, 2690 0, 2691 MatMissingDiagonal_MPIAIJ, 2692 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2693 0, 2694 MatGetGhosts_MPIAIJ, 2695 0, 2696 0, 2697 /*119*/0, 2698 0, 2699 0, 2700 0, 2701 MatGetMultiProcBlock_MPIAIJ, 2702 /*124*/MatFindNonzeroRows_MPIAIJ, 2703 MatGetColumnNorms_MPIAIJ, 2704 MatInvertBlockDiagonal_MPIAIJ, 2705 0, 2706 MatGetSubMatricesMPI_MPIAIJ, 2707 /*129*/0, 2708 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2709 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2710 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2711 0, 2712 /*134*/0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /*139*/0, 2718 0, 2719 0, 2720 MatFDColoringSetUp_MPIXAIJ, 2721 MatFindOffBlockDiagonalEntries_MPIAIJ, 2722 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2723 }; 2724 2725 /* ----------------------------------------------------------------------------------------*/ 2726 2727 #undef __FUNCT__ 2728 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2729 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2730 { 2731 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2732 PetscErrorCode ierr; 2733 2734 PetscFunctionBegin; 2735 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2736 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2742 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2743 { 2744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2749 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2755 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2756 { 2757 Mat_MPIAIJ *b; 2758 PetscErrorCode ierr; 2759 2760 PetscFunctionBegin; 2761 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2762 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2763 b = (Mat_MPIAIJ*)B->data; 2764 2765 if (!B->preallocated) { 2766 /* Explicitly create 2 MATSEQAIJ matrices. */ 2767 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2768 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2769 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2770 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2771 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2774 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2775 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2777 } 2778 2779 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2780 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2781 B->preallocated = PETSC_TRUE; 2782 PetscFunctionReturn(0); 2783 } 2784 2785 #undef __FUNCT__ 2786 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2787 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2788 { 2789 Mat mat; 2790 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2791 PetscErrorCode ierr; 2792 2793 PetscFunctionBegin; 2794 *newmat = 0; 2795 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2796 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2797 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2798 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2799 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2800 a = (Mat_MPIAIJ*)mat->data; 2801 2802 mat->factortype = matin->factortype; 2803 mat->assembled = PETSC_TRUE; 2804 mat->insertmode = NOT_SET_VALUES; 2805 mat->preallocated = PETSC_TRUE; 2806 2807 a->size = oldmat->size; 2808 a->rank = oldmat->rank; 2809 a->donotstash = oldmat->donotstash; 2810 a->roworiented = oldmat->roworiented; 2811 a->rowindices = 0; 2812 a->rowvalues = 0; 2813 a->getrowactive = PETSC_FALSE; 2814 2815 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2817 2818 if (oldmat->colmap) { 2819 #if defined(PETSC_USE_CTABLE) 2820 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2821 #else 2822 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2823 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2824 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2825 #endif 2826 } else a->colmap = 0; 2827 if (oldmat->garray) { 2828 PetscInt len; 2829 len = oldmat->B->cmap->n; 2830 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2831 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2832 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2833 } else a->garray = 0; 2834 2835 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2837 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2839 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2841 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2843 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2844 *newmat = mat; 2845 PetscFunctionReturn(0); 2846 } 2847 2848 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatLoad_MPIAIJ" 2852 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2853 { 2854 PetscScalar *vals,*svals; 2855 MPI_Comm comm; 2856 PetscErrorCode ierr; 2857 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2858 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2859 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2860 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2861 PetscInt cend,cstart,n,*rowners; 2862 int fd; 2863 PetscInt bs = newMat->rmap->bs; 2864 2865 PetscFunctionBegin; 2866 /* force binary viewer to load .info file if it has not yet done so */ 2867 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2868 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2869 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2870 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2871 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2872 if (!rank) { 2873 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2874 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2875 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2876 } 2877 2878 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2879 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2880 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2881 if (bs < 0) bs = 1; 2882 2883 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2884 M = header[1]; N = header[2]; 2885 2886 /* If global sizes are set, check if they are consistent with that given in the file */ 2887 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2888 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2889 2890 /* determine ownership of all (block) rows */ 2891 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2892 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2893 else m = newMat->rmap->n; /* Set by user */ 2894 2895 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2896 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2897 2898 /* First process needs enough room for process with most rows */ 2899 if (!rank) { 2900 mmax = rowners[1]; 2901 for (i=2; i<=size; i++) { 2902 mmax = PetscMax(mmax, rowners[i]); 2903 } 2904 } else mmax = -1; /* unused, but compilers complain */ 2905 2906 rowners[0] = 0; 2907 for (i=2; i<=size; i++) { 2908 rowners[i] += rowners[i-1]; 2909 } 2910 rstart = rowners[rank]; 2911 rend = rowners[rank+1]; 2912 2913 /* distribute row lengths to all processors */ 2914 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2915 if (!rank) { 2916 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2917 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2918 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2919 for (j=0; j<m; j++) { 2920 procsnz[0] += ourlens[j]; 2921 } 2922 for (i=1; i<size; i++) { 2923 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2924 /* calculate the number of nonzeros on each processor */ 2925 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2926 procsnz[i] += rowlengths[j]; 2927 } 2928 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2929 } 2930 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2931 } else { 2932 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2933 } 2934 2935 if (!rank) { 2936 /* determine max buffer needed and allocate it */ 2937 maxnz = 0; 2938 for (i=0; i<size; i++) { 2939 maxnz = PetscMax(maxnz,procsnz[i]); 2940 } 2941 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2942 2943 /* read in my part of the matrix column indices */ 2944 nz = procsnz[0]; 2945 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2946 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2947 2948 /* read in every one elses and ship off */ 2949 for (i=1; i<size; i++) { 2950 nz = procsnz[i]; 2951 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2952 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2953 } 2954 ierr = PetscFree(cols);CHKERRQ(ierr); 2955 } else { 2956 /* determine buffer space needed for message */ 2957 nz = 0; 2958 for (i=0; i<m; i++) { 2959 nz += ourlens[i]; 2960 } 2961 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2962 2963 /* receive message of column indices*/ 2964 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2965 } 2966 2967 /* determine column ownership if matrix is not square */ 2968 if (N != M) { 2969 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2970 else n = newMat->cmap->n; 2971 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2972 cstart = cend - n; 2973 } else { 2974 cstart = rstart; 2975 cend = rend; 2976 n = cend - cstart; 2977 } 2978 2979 /* loop over local rows, determining number of off diagonal entries */ 2980 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2981 jj = 0; 2982 for (i=0; i<m; i++) { 2983 for (j=0; j<ourlens[i]; j++) { 2984 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2985 jj++; 2986 } 2987 } 2988 2989 for (i=0; i<m; i++) { 2990 ourlens[i] -= offlens[i]; 2991 } 2992 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2993 2994 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2995 2996 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2997 2998 for (i=0; i<m; i++) { 2999 ourlens[i] += offlens[i]; 3000 } 3001 3002 if (!rank) { 3003 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3004 3005 /* read in my part of the matrix numerical values */ 3006 nz = procsnz[0]; 3007 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3008 3009 /* insert into matrix */ 3010 jj = rstart; 3011 smycols = mycols; 3012 svals = vals; 3013 for (i=0; i<m; i++) { 3014 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3015 smycols += ourlens[i]; 3016 svals += ourlens[i]; 3017 jj++; 3018 } 3019 3020 /* read in other processors and ship out */ 3021 for (i=1; i<size; i++) { 3022 nz = procsnz[i]; 3023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3024 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3025 } 3026 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3027 } else { 3028 /* receive numeric values */ 3029 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3030 3031 /* receive message of values*/ 3032 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3033 3034 /* insert into matrix */ 3035 jj = rstart; 3036 smycols = mycols; 3037 svals = vals; 3038 for (i=0; i<m; i++) { 3039 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3040 smycols += ourlens[i]; 3041 svals += ourlens[i]; 3042 jj++; 3043 } 3044 } 3045 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3046 ierr = PetscFree(vals);CHKERRQ(ierr); 3047 ierr = PetscFree(mycols);CHKERRQ(ierr); 3048 ierr = PetscFree(rowners);CHKERRQ(ierr); 3049 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3050 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 #undef __FUNCT__ 3055 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3056 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3057 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3058 { 3059 PetscErrorCode ierr; 3060 IS iscol_local; 3061 PetscInt csize; 3062 3063 PetscFunctionBegin; 3064 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3065 if (call == MAT_REUSE_MATRIX) { 3066 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3067 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3068 } else { 3069 /* check if we are grabbing all columns*/ 3070 PetscBool isstride; 3071 PetscMPIInt lisstride = 0,gisstride; 3072 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3073 if (isstride) { 3074 PetscInt start,len,mstart,mlen; 3075 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3076 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3077 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3078 if (mstart == start && mlen-mstart == len) lisstride = 1; 3079 } 3080 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3081 if (gisstride) { 3082 PetscInt N; 3083 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3084 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3085 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3086 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3087 } else { 3088 PetscInt cbs; 3089 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3090 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3091 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3092 } 3093 } 3094 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3095 if (call == MAT_INITIAL_MATRIX) { 3096 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3097 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3098 } 3099 PetscFunctionReturn(0); 3100 } 3101 3102 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3105 /* 3106 Not great since it makes two copies of the submatrix, first an SeqAIJ 3107 in local and then by concatenating the local matrices the end result. 3108 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3109 3110 Note: This requires a sequential iscol with all indices. 3111 */ 3112 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3113 { 3114 PetscErrorCode ierr; 3115 PetscMPIInt rank,size; 3116 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3117 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3118 PetscBool allcolumns, colflag; 3119 Mat M,Mreuse; 3120 MatScalar *vwork,*aa; 3121 MPI_Comm comm; 3122 Mat_SeqAIJ *aij; 3123 3124 PetscFunctionBegin; 3125 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3126 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3127 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3128 3129 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3130 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3131 if (colflag && ncol == mat->cmap->N) { 3132 allcolumns = PETSC_TRUE; 3133 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3134 } else { 3135 allcolumns = PETSC_FALSE; 3136 } 3137 if (call == MAT_REUSE_MATRIX) { 3138 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3139 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3140 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3141 } else { 3142 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3143 } 3144 3145 /* 3146 m - number of local rows 3147 n - number of columns (same on all processors) 3148 rstart - first row in new global matrix generated 3149 */ 3150 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3151 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3152 if (call == MAT_INITIAL_MATRIX) { 3153 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3154 ii = aij->i; 3155 jj = aij->j; 3156 3157 /* 3158 Determine the number of non-zeros in the diagonal and off-diagonal 3159 portions of the matrix in order to do correct preallocation 3160 */ 3161 3162 /* first get start and end of "diagonal" columns */ 3163 if (csize == PETSC_DECIDE) { 3164 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3165 if (mglobal == n) { /* square matrix */ 3166 nlocal = m; 3167 } else { 3168 nlocal = n/size + ((n % size) > rank); 3169 } 3170 } else { 3171 nlocal = csize; 3172 } 3173 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3174 rstart = rend - nlocal; 3175 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3176 3177 /* next, compute all the lengths */ 3178 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3179 olens = dlens + m; 3180 for (i=0; i<m; i++) { 3181 jend = ii[i+1] - ii[i]; 3182 olen = 0; 3183 dlen = 0; 3184 for (j=0; j<jend; j++) { 3185 if (*jj < rstart || *jj >= rend) olen++; 3186 else dlen++; 3187 jj++; 3188 } 3189 olens[i] = olen; 3190 dlens[i] = dlen; 3191 } 3192 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3193 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3194 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3195 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3196 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3197 ierr = PetscFree(dlens);CHKERRQ(ierr); 3198 } else { 3199 PetscInt ml,nl; 3200 3201 M = *newmat; 3202 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3203 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3204 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3205 /* 3206 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3207 rather than the slower MatSetValues(). 3208 */ 3209 M->was_assembled = PETSC_TRUE; 3210 M->assembled = PETSC_FALSE; 3211 } 3212 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3213 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3214 ii = aij->i; 3215 jj = aij->j; 3216 aa = aij->a; 3217 for (i=0; i<m; i++) { 3218 row = rstart + i; 3219 nz = ii[i+1] - ii[i]; 3220 cwork = jj; jj += nz; 3221 vwork = aa; aa += nz; 3222 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3223 } 3224 3225 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3226 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3227 *newmat = M; 3228 3229 /* save submatrix used in processor for next request */ 3230 if (call == MAT_INITIAL_MATRIX) { 3231 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3232 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3233 } 3234 PetscFunctionReturn(0); 3235 } 3236 3237 #undef __FUNCT__ 3238 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3239 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3240 { 3241 PetscInt m,cstart, cend,j,nnz,i,d; 3242 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3243 const PetscInt *JJ; 3244 PetscScalar *values; 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3249 3250 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3251 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3252 m = B->rmap->n; 3253 cstart = B->cmap->rstart; 3254 cend = B->cmap->rend; 3255 rstart = B->rmap->rstart; 3256 3257 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3258 3259 #if defined(PETSC_USE_DEBUGGING) 3260 for (i=0; i<m; i++) { 3261 nnz = Ii[i+1]- Ii[i]; 3262 JJ = J + Ii[i]; 3263 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3264 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3265 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3266 } 3267 #endif 3268 3269 for (i=0; i<m; i++) { 3270 nnz = Ii[i+1]- Ii[i]; 3271 JJ = J + Ii[i]; 3272 nnz_max = PetscMax(nnz_max,nnz); 3273 d = 0; 3274 for (j=0; j<nnz; j++) { 3275 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3276 } 3277 d_nnz[i] = d; 3278 o_nnz[i] = nnz - d; 3279 } 3280 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3281 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3282 3283 if (v) values = (PetscScalar*)v; 3284 else { 3285 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3286 } 3287 3288 for (i=0; i<m; i++) { 3289 ii = i + rstart; 3290 nnz = Ii[i+1]- Ii[i]; 3291 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3292 } 3293 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3294 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3295 3296 if (!v) { 3297 ierr = PetscFree(values);CHKERRQ(ierr); 3298 } 3299 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 3303 #undef __FUNCT__ 3304 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3305 /*@ 3306 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3307 (the default parallel PETSc format). 3308 3309 Collective on MPI_Comm 3310 3311 Input Parameters: 3312 + B - the matrix 3313 . i - the indices into j for the start of each local row (starts with zero) 3314 . j - the column indices for each local row (starts with zero) 3315 - v - optional values in the matrix 3316 3317 Level: developer 3318 3319 Notes: 3320 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3321 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3322 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3323 3324 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3325 3326 The format which is used for the sparse matrix input, is equivalent to a 3327 row-major ordering.. i.e for the following matrix, the input data expected is 3328 as shown 3329 3330 $ 1 0 0 3331 $ 2 0 3 P0 3332 $ ------- 3333 $ 4 5 6 P1 3334 $ 3335 $ Process0 [P0]: rows_owned=[0,1] 3336 $ i = {0,1,3} [size = nrow+1 = 2+1] 3337 $ j = {0,0,2} [size = 3] 3338 $ v = {1,2,3} [size = 3] 3339 $ 3340 $ Process1 [P1]: rows_owned=[2] 3341 $ i = {0,3} [size = nrow+1 = 1+1] 3342 $ j = {0,1,2} [size = 3] 3343 $ v = {4,5,6} [size = 3] 3344 3345 .keywords: matrix, aij, compressed row, sparse, parallel 3346 3347 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3348 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3349 @*/ 3350 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3351 { 3352 PetscErrorCode ierr; 3353 3354 PetscFunctionBegin; 3355 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3356 PetscFunctionReturn(0); 3357 } 3358 3359 #undef __FUNCT__ 3360 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3361 /*@C 3362 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3363 (the default parallel PETSc format). For good matrix assembly performance 3364 the user should preallocate the matrix storage by setting the parameters 3365 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3366 performance can be increased by more than a factor of 50. 3367 3368 Collective on MPI_Comm 3369 3370 Input Parameters: 3371 + B - the matrix 3372 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3373 (same value is used for all local rows) 3374 . d_nnz - array containing the number of nonzeros in the various rows of the 3375 DIAGONAL portion of the local submatrix (possibly different for each row) 3376 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3377 The size of this array is equal to the number of local rows, i.e 'm'. 3378 For matrices that will be factored, you must leave room for (and set) 3379 the diagonal entry even if it is zero. 3380 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3381 submatrix (same value is used for all local rows). 3382 - o_nnz - array containing the number of nonzeros in the various rows of the 3383 OFF-DIAGONAL portion of the local submatrix (possibly different for 3384 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3385 structure. The size of this array is equal to the number 3386 of local rows, i.e 'm'. 3387 3388 If the *_nnz parameter is given then the *_nz parameter is ignored 3389 3390 The AIJ format (also called the Yale sparse matrix format or 3391 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3392 storage. The stored row and column indices begin with zero. 3393 See Users-Manual: ch_mat for details. 3394 3395 The parallel matrix is partitioned such that the first m0 rows belong to 3396 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3397 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3398 3399 The DIAGONAL portion of the local submatrix of a processor can be defined 3400 as the submatrix which is obtained by extraction the part corresponding to 3401 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3402 first row that belongs to the processor, r2 is the last row belonging to 3403 the this processor, and c1-c2 is range of indices of the local part of a 3404 vector suitable for applying the matrix to. This is an mxn matrix. In the 3405 common case of a square matrix, the row and column ranges are the same and 3406 the DIAGONAL part is also square. The remaining portion of the local 3407 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3408 3409 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3410 3411 You can call MatGetInfo() to get information on how effective the preallocation was; 3412 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3413 You can also run with the option -info and look for messages with the string 3414 malloc in them to see if additional memory allocation was needed. 3415 3416 Example usage: 3417 3418 Consider the following 8x8 matrix with 34 non-zero values, that is 3419 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3420 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3421 as follows: 3422 3423 .vb 3424 1 2 0 | 0 3 0 | 0 4 3425 Proc0 0 5 6 | 7 0 0 | 8 0 3426 9 0 10 | 11 0 0 | 12 0 3427 ------------------------------------- 3428 13 0 14 | 15 16 17 | 0 0 3429 Proc1 0 18 0 | 19 20 21 | 0 0 3430 0 0 0 | 22 23 0 | 24 0 3431 ------------------------------------- 3432 Proc2 25 26 27 | 0 0 28 | 29 0 3433 30 0 0 | 31 32 33 | 0 34 3434 .ve 3435 3436 This can be represented as a collection of submatrices as: 3437 3438 .vb 3439 A B C 3440 D E F 3441 G H I 3442 .ve 3443 3444 Where the submatrices A,B,C are owned by proc0, D,E,F are 3445 owned by proc1, G,H,I are owned by proc2. 3446 3447 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3448 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3449 The 'M','N' parameters are 8,8, and have the same values on all procs. 3450 3451 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3452 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3453 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3454 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3455 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3456 matrix, ans [DF] as another SeqAIJ matrix. 3457 3458 When d_nz, o_nz parameters are specified, d_nz storage elements are 3459 allocated for every row of the local diagonal submatrix, and o_nz 3460 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3461 One way to choose d_nz and o_nz is to use the max nonzerors per local 3462 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3463 In this case, the values of d_nz,o_nz are: 3464 .vb 3465 proc0 : dnz = 2, o_nz = 2 3466 proc1 : dnz = 3, o_nz = 2 3467 proc2 : dnz = 1, o_nz = 4 3468 .ve 3469 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3470 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3471 for proc3. i.e we are using 12+15+10=37 storage locations to store 3472 34 values. 3473 3474 When d_nnz, o_nnz parameters are specified, the storage is specified 3475 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3476 In the above case the values for d_nnz,o_nnz are: 3477 .vb 3478 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3479 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3480 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3481 .ve 3482 Here the space allocated is sum of all the above values i.e 34, and 3483 hence pre-allocation is perfect. 3484 3485 Level: intermediate 3486 3487 .keywords: matrix, aij, compressed row, sparse, parallel 3488 3489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3490 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3491 @*/ 3492 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3493 { 3494 PetscErrorCode ierr; 3495 3496 PetscFunctionBegin; 3497 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3498 PetscValidType(B,1); 3499 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3500 PetscFunctionReturn(0); 3501 } 3502 3503 #undef __FUNCT__ 3504 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3505 /*@ 3506 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3507 CSR format the local rows. 3508 3509 Collective on MPI_Comm 3510 3511 Input Parameters: 3512 + comm - MPI communicator 3513 . m - number of local rows (Cannot be PETSC_DECIDE) 3514 . n - This value should be the same as the local size used in creating the 3515 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3516 calculated if N is given) For square matrices n is almost always m. 3517 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3518 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3519 . i - row indices 3520 . j - column indices 3521 - a - matrix values 3522 3523 Output Parameter: 3524 . mat - the matrix 3525 3526 Level: intermediate 3527 3528 Notes: 3529 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3530 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3531 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3532 3533 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3534 3535 The format which is used for the sparse matrix input, is equivalent to a 3536 row-major ordering.. i.e for the following matrix, the input data expected is 3537 as shown 3538 3539 $ 1 0 0 3540 $ 2 0 3 P0 3541 $ ------- 3542 $ 4 5 6 P1 3543 $ 3544 $ Process0 [P0]: rows_owned=[0,1] 3545 $ i = {0,1,3} [size = nrow+1 = 2+1] 3546 $ j = {0,0,2} [size = 3] 3547 $ v = {1,2,3} [size = 3] 3548 $ 3549 $ Process1 [P1]: rows_owned=[2] 3550 $ i = {0,3} [size = nrow+1 = 1+1] 3551 $ j = {0,1,2} [size = 3] 3552 $ v = {4,5,6} [size = 3] 3553 3554 .keywords: matrix, aij, compressed row, sparse, parallel 3555 3556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3557 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3558 @*/ 3559 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3560 { 3561 PetscErrorCode ierr; 3562 3563 PetscFunctionBegin; 3564 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3565 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3566 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3567 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3568 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3569 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3570 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3571 PetscFunctionReturn(0); 3572 } 3573 3574 #undef __FUNCT__ 3575 #define __FUNCT__ "MatCreateAIJ" 3576 /*@C 3577 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3578 (the default parallel PETSc format). For good matrix assembly performance 3579 the user should preallocate the matrix storage by setting the parameters 3580 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3581 performance can be increased by more than a factor of 50. 3582 3583 Collective on MPI_Comm 3584 3585 Input Parameters: 3586 + comm - MPI communicator 3587 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3588 This value should be the same as the local size used in creating the 3589 y vector for the matrix-vector product y = Ax. 3590 . n - This value should be the same as the local size used in creating the 3591 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3592 calculated if N is given) For square matrices n is almost always m. 3593 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3594 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3595 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3596 (same value is used for all local rows) 3597 . d_nnz - array containing the number of nonzeros in the various rows of the 3598 DIAGONAL portion of the local submatrix (possibly different for each row) 3599 or NULL, if d_nz is used to specify the nonzero structure. 3600 The size of this array is equal to the number of local rows, i.e 'm'. 3601 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3602 submatrix (same value is used for all local rows). 3603 - o_nnz - array containing the number of nonzeros in the various rows of the 3604 OFF-DIAGONAL portion of the local submatrix (possibly different for 3605 each row) or NULL, if o_nz is used to specify the nonzero 3606 structure. The size of this array is equal to the number 3607 of local rows, i.e 'm'. 3608 3609 Output Parameter: 3610 . A - the matrix 3611 3612 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3613 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3614 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3615 3616 Notes: 3617 If the *_nnz parameter is given then the *_nz parameter is ignored 3618 3619 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3620 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3621 storage requirements for this matrix. 3622 3623 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3624 processor than it must be used on all processors that share the object for 3625 that argument. 3626 3627 The user MUST specify either the local or global matrix dimensions 3628 (possibly both). 3629 3630 The parallel matrix is partitioned across processors such that the 3631 first m0 rows belong to process 0, the next m1 rows belong to 3632 process 1, the next m2 rows belong to process 2 etc.. where 3633 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3634 values corresponding to [m x N] submatrix. 3635 3636 The columns are logically partitioned with the n0 columns belonging 3637 to 0th partition, the next n1 columns belonging to the next 3638 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3639 3640 The DIAGONAL portion of the local submatrix on any given processor 3641 is the submatrix corresponding to the rows and columns m,n 3642 corresponding to the given processor. i.e diagonal matrix on 3643 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3644 etc. The remaining portion of the local submatrix [m x (N-n)] 3645 constitute the OFF-DIAGONAL portion. The example below better 3646 illustrates this concept. 3647 3648 For a square global matrix we define each processor's diagonal portion 3649 to be its local rows and the corresponding columns (a square submatrix); 3650 each processor's off-diagonal portion encompasses the remainder of the 3651 local matrix (a rectangular submatrix). 3652 3653 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3654 3655 When calling this routine with a single process communicator, a matrix of 3656 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3657 type of communicator, use the construction mechanism: 3658 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3659 3660 By default, this format uses inodes (identical nodes) when possible. 3661 We search for consecutive rows with the same nonzero structure, thereby 3662 reusing matrix information to achieve increased efficiency. 3663 3664 Options Database Keys: 3665 + -mat_no_inode - Do not use inodes 3666 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3667 - -mat_aij_oneindex - Internally use indexing starting at 1 3668 rather than 0. Note that when calling MatSetValues(), 3669 the user still MUST index entries starting at 0! 3670 3671 3672 Example usage: 3673 3674 Consider the following 8x8 matrix with 34 non-zero values, that is 3675 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3676 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3677 as follows: 3678 3679 .vb 3680 1 2 0 | 0 3 0 | 0 4 3681 Proc0 0 5 6 | 7 0 0 | 8 0 3682 9 0 10 | 11 0 0 | 12 0 3683 ------------------------------------- 3684 13 0 14 | 15 16 17 | 0 0 3685 Proc1 0 18 0 | 19 20 21 | 0 0 3686 0 0 0 | 22 23 0 | 24 0 3687 ------------------------------------- 3688 Proc2 25 26 27 | 0 0 28 | 29 0 3689 30 0 0 | 31 32 33 | 0 34 3690 .ve 3691 3692 This can be represented as a collection of submatrices as: 3693 3694 .vb 3695 A B C 3696 D E F 3697 G H I 3698 .ve 3699 3700 Where the submatrices A,B,C are owned by proc0, D,E,F are 3701 owned by proc1, G,H,I are owned by proc2. 3702 3703 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3704 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3705 The 'M','N' parameters are 8,8, and have the same values on all procs. 3706 3707 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3708 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3709 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3710 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3711 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3712 matrix, ans [DF] as another SeqAIJ matrix. 3713 3714 When d_nz, o_nz parameters are specified, d_nz storage elements are 3715 allocated for every row of the local diagonal submatrix, and o_nz 3716 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3717 One way to choose d_nz and o_nz is to use the max nonzerors per local 3718 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3719 In this case, the values of d_nz,o_nz are: 3720 .vb 3721 proc0 : dnz = 2, o_nz = 2 3722 proc1 : dnz = 3, o_nz = 2 3723 proc2 : dnz = 1, o_nz = 4 3724 .ve 3725 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3726 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3727 for proc3. i.e we are using 12+15+10=37 storage locations to store 3728 34 values. 3729 3730 When d_nnz, o_nnz parameters are specified, the storage is specified 3731 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3732 In the above case the values for d_nnz,o_nnz are: 3733 .vb 3734 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3735 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3736 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3737 .ve 3738 Here the space allocated is sum of all the above values i.e 34, and 3739 hence pre-allocation is perfect. 3740 3741 Level: intermediate 3742 3743 .keywords: matrix, aij, compressed row, sparse, parallel 3744 3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3746 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3747 @*/ 3748 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3749 { 3750 PetscErrorCode ierr; 3751 PetscMPIInt size; 3752 3753 PetscFunctionBegin; 3754 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3755 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3756 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3757 if (size > 1) { 3758 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3759 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3760 } else { 3761 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3762 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3763 } 3764 PetscFunctionReturn(0); 3765 } 3766 3767 #undef __FUNCT__ 3768 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3769 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3770 { 3771 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3772 PetscBool flg; 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3777 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3778 if (Ad) *Ad = a->A; 3779 if (Ao) *Ao = a->B; 3780 if (colmap) *colmap = a->garray; 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3786 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3787 { 3788 PetscErrorCode ierr; 3789 PetscInt m,N,i,rstart,nnz,Ii; 3790 PetscInt *indx; 3791 PetscScalar *values; 3792 3793 PetscFunctionBegin; 3794 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3795 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3796 PetscInt *dnz,*onz,sum,bs,cbs; 3797 3798 if (n == PETSC_DECIDE) { 3799 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3800 } 3801 /* Check sum(n) = N */ 3802 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3803 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3804 3805 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3806 rstart -= m; 3807 3808 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3809 for (i=0; i<m; i++) { 3810 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3811 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3812 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3813 } 3814 3815 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3816 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3817 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3818 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3819 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3820 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3821 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3822 } 3823 3824 /* numeric phase */ 3825 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3826 for (i=0; i<m; i++) { 3827 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3828 Ii = i + rstart; 3829 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3830 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3831 } 3832 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3833 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 PetscFunctionReturn(0); 3835 } 3836 3837 #undef __FUNCT__ 3838 #define __FUNCT__ "MatFileSplit" 3839 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3840 { 3841 PetscErrorCode ierr; 3842 PetscMPIInt rank; 3843 PetscInt m,N,i,rstart,nnz; 3844 size_t len; 3845 const PetscInt *indx; 3846 PetscViewer out; 3847 char *name; 3848 Mat B; 3849 const PetscScalar *values; 3850 3851 PetscFunctionBegin; 3852 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3853 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3854 /* Should this be the type of the diagonal block of A? */ 3855 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3856 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3857 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3858 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3859 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3860 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3861 for (i=0; i<m; i++) { 3862 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3863 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3864 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3865 } 3866 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3867 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 3869 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3870 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3871 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3872 sprintf(name,"%s.%d",outfile,rank); 3873 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3874 ierr = PetscFree(name);CHKERRQ(ierr); 3875 ierr = MatView(B,out);CHKERRQ(ierr); 3876 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3877 ierr = MatDestroy(&B);CHKERRQ(ierr); 3878 PetscFunctionReturn(0); 3879 } 3880 3881 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3882 #undef __FUNCT__ 3883 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3884 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3885 { 3886 PetscErrorCode ierr; 3887 Mat_Merge_SeqsToMPI *merge; 3888 PetscContainer container; 3889 3890 PetscFunctionBegin; 3891 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3892 if (container) { 3893 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3894 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3906 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3907 ierr = PetscFree(merge);CHKERRQ(ierr); 3908 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3909 } 3910 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 #include <../src/mat/utils/freespace.h> 3915 #include <petscbt.h> 3916 3917 #undef __FUNCT__ 3918 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3919 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3920 { 3921 PetscErrorCode ierr; 3922 MPI_Comm comm; 3923 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3924 PetscMPIInt size,rank,taga,*len_s; 3925 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3926 PetscInt proc,m; 3927 PetscInt **buf_ri,**buf_rj; 3928 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3929 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3930 MPI_Request *s_waits,*r_waits; 3931 MPI_Status *status; 3932 MatScalar *aa=a->a; 3933 MatScalar **abuf_r,*ba_i; 3934 Mat_Merge_SeqsToMPI *merge; 3935 PetscContainer container; 3936 3937 PetscFunctionBegin; 3938 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3939 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3940 3941 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3942 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3943 3944 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3945 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3946 3947 bi = merge->bi; 3948 bj = merge->bj; 3949 buf_ri = merge->buf_ri; 3950 buf_rj = merge->buf_rj; 3951 3952 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3953 owners = merge->rowmap->range; 3954 len_s = merge->len_s; 3955 3956 /* send and recv matrix values */ 3957 /*-----------------------------*/ 3958 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3959 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3960 3961 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3962 for (proc=0,k=0; proc<size; proc++) { 3963 if (!len_s[proc]) continue; 3964 i = owners[proc]; 3965 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3966 k++; 3967 } 3968 3969 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3970 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3971 ierr = PetscFree(status);CHKERRQ(ierr); 3972 3973 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3974 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3975 3976 /* insert mat values of mpimat */ 3977 /*----------------------------*/ 3978 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3979 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3980 3981 for (k=0; k<merge->nrecv; k++) { 3982 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3983 nrows = *(buf_ri_k[k]); 3984 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3985 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3986 } 3987 3988 /* set values of ba */ 3989 m = merge->rowmap->n; 3990 for (i=0; i<m; i++) { 3991 arow = owners[rank] + i; 3992 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3993 bnzi = bi[i+1] - bi[i]; 3994 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3995 3996 /* add local non-zero vals of this proc's seqmat into ba */ 3997 anzi = ai[arow+1] - ai[arow]; 3998 aj = a->j + ai[arow]; 3999 aa = a->a + ai[arow]; 4000 nextaj = 0; 4001 for (j=0; nextaj<anzi; j++) { 4002 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4003 ba_i[j] += aa[nextaj++]; 4004 } 4005 } 4006 4007 /* add received vals into ba */ 4008 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4009 /* i-th row */ 4010 if (i == *nextrow[k]) { 4011 anzi = *(nextai[k]+1) - *nextai[k]; 4012 aj = buf_rj[k] + *(nextai[k]); 4013 aa = abuf_r[k] + *(nextai[k]); 4014 nextaj = 0; 4015 for (j=0; nextaj<anzi; j++) { 4016 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4017 ba_i[j] += aa[nextaj++]; 4018 } 4019 } 4020 nextrow[k]++; nextai[k]++; 4021 } 4022 } 4023 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4024 } 4025 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4026 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4027 4028 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4029 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4030 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4031 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4032 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4037 4038 #undef __FUNCT__ 4039 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4040 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4041 { 4042 PetscErrorCode ierr; 4043 Mat B_mpi; 4044 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4045 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4046 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4047 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4048 PetscInt len,proc,*dnz,*onz,bs,cbs; 4049 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4050 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4051 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4052 MPI_Status *status; 4053 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4054 PetscBT lnkbt; 4055 Mat_Merge_SeqsToMPI *merge; 4056 PetscContainer container; 4057 4058 PetscFunctionBegin; 4059 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4060 4061 /* make sure it is a PETSc comm */ 4062 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4063 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4064 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4065 4066 ierr = PetscNew(&merge);CHKERRQ(ierr); 4067 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4068 4069 /* determine row ownership */ 4070 /*---------------------------------------------------------*/ 4071 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4072 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4073 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4074 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4075 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4076 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4077 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4078 4079 m = merge->rowmap->n; 4080 owners = merge->rowmap->range; 4081 4082 /* determine the number of messages to send, their lengths */ 4083 /*---------------------------------------------------------*/ 4084 len_s = merge->len_s; 4085 4086 len = 0; /* length of buf_si[] */ 4087 merge->nsend = 0; 4088 for (proc=0; proc<size; proc++) { 4089 len_si[proc] = 0; 4090 if (proc == rank) { 4091 len_s[proc] = 0; 4092 } else { 4093 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4094 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4095 } 4096 if (len_s[proc]) { 4097 merge->nsend++; 4098 nrows = 0; 4099 for (i=owners[proc]; i<owners[proc+1]; i++) { 4100 if (ai[i+1] > ai[i]) nrows++; 4101 } 4102 len_si[proc] = 2*(nrows+1); 4103 len += len_si[proc]; 4104 } 4105 } 4106 4107 /* determine the number and length of messages to receive for ij-structure */ 4108 /*-------------------------------------------------------------------------*/ 4109 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4110 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4111 4112 /* post the Irecv of j-structure */ 4113 /*-------------------------------*/ 4114 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4115 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4116 4117 /* post the Isend of j-structure */ 4118 /*--------------------------------*/ 4119 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4120 4121 for (proc=0, k=0; proc<size; proc++) { 4122 if (!len_s[proc]) continue; 4123 i = owners[proc]; 4124 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4125 k++; 4126 } 4127 4128 /* receives and sends of j-structure are complete */ 4129 /*------------------------------------------------*/ 4130 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4131 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4132 4133 /* send and recv i-structure */ 4134 /*---------------------------*/ 4135 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4136 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4137 4138 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4139 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4140 for (proc=0,k=0; proc<size; proc++) { 4141 if (!len_s[proc]) continue; 4142 /* form outgoing message for i-structure: 4143 buf_si[0]: nrows to be sent 4144 [1:nrows]: row index (global) 4145 [nrows+1:2*nrows+1]: i-structure index 4146 */ 4147 /*-------------------------------------------*/ 4148 nrows = len_si[proc]/2 - 1; 4149 buf_si_i = buf_si + nrows+1; 4150 buf_si[0] = nrows; 4151 buf_si_i[0] = 0; 4152 nrows = 0; 4153 for (i=owners[proc]; i<owners[proc+1]; i++) { 4154 anzi = ai[i+1] - ai[i]; 4155 if (anzi) { 4156 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4157 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4158 nrows++; 4159 } 4160 } 4161 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4162 k++; 4163 buf_si += len_si[proc]; 4164 } 4165 4166 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4167 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4168 4169 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4170 for (i=0; i<merge->nrecv; i++) { 4171 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4172 } 4173 4174 ierr = PetscFree(len_si);CHKERRQ(ierr); 4175 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4176 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4177 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4178 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4179 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4180 ierr = PetscFree(status);CHKERRQ(ierr); 4181 4182 /* compute a local seq matrix in each processor */ 4183 /*----------------------------------------------*/ 4184 /* allocate bi array and free space for accumulating nonzero column info */ 4185 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4186 bi[0] = 0; 4187 4188 /* create and initialize a linked list */ 4189 nlnk = N+1; 4190 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4191 4192 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4193 len = ai[owners[rank+1]] - ai[owners[rank]]; 4194 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4195 4196 current_space = free_space; 4197 4198 /* determine symbolic info for each local row */ 4199 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4200 4201 for (k=0; k<merge->nrecv; k++) { 4202 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4203 nrows = *buf_ri_k[k]; 4204 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4205 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4206 } 4207 4208 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4209 len = 0; 4210 for (i=0; i<m; i++) { 4211 bnzi = 0; 4212 /* add local non-zero cols of this proc's seqmat into lnk */ 4213 arow = owners[rank] + i; 4214 anzi = ai[arow+1] - ai[arow]; 4215 aj = a->j + ai[arow]; 4216 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4217 bnzi += nlnk; 4218 /* add received col data into lnk */ 4219 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4220 if (i == *nextrow[k]) { /* i-th row */ 4221 anzi = *(nextai[k]+1) - *nextai[k]; 4222 aj = buf_rj[k] + *nextai[k]; 4223 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4224 bnzi += nlnk; 4225 nextrow[k]++; nextai[k]++; 4226 } 4227 } 4228 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4229 4230 /* if free space is not available, make more free space */ 4231 if (current_space->local_remaining<bnzi) { 4232 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4233 nspacedouble++; 4234 } 4235 /* copy data into free space, then initialize lnk */ 4236 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4237 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4238 4239 current_space->array += bnzi; 4240 current_space->local_used += bnzi; 4241 current_space->local_remaining -= bnzi; 4242 4243 bi[i+1] = bi[i] + bnzi; 4244 } 4245 4246 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4247 4248 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4249 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4250 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4251 4252 /* create symbolic parallel matrix B_mpi */ 4253 /*---------------------------------------*/ 4254 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4255 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4256 if (n==PETSC_DECIDE) { 4257 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4258 } else { 4259 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4260 } 4261 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4262 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4263 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4264 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4265 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4266 4267 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4268 B_mpi->assembled = PETSC_FALSE; 4269 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4270 merge->bi = bi; 4271 merge->bj = bj; 4272 merge->buf_ri = buf_ri; 4273 merge->buf_rj = buf_rj; 4274 merge->coi = NULL; 4275 merge->coj = NULL; 4276 merge->owners_co = NULL; 4277 4278 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4279 4280 /* attach the supporting struct to B_mpi for reuse */ 4281 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4282 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4283 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4284 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4285 *mpimat = B_mpi; 4286 4287 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4288 PetscFunctionReturn(0); 4289 } 4290 4291 #undef __FUNCT__ 4292 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4293 /*@C 4294 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4295 matrices from each processor 4296 4297 Collective on MPI_Comm 4298 4299 Input Parameters: 4300 + comm - the communicators the parallel matrix will live on 4301 . seqmat - the input sequential matrices 4302 . m - number of local rows (or PETSC_DECIDE) 4303 . n - number of local columns (or PETSC_DECIDE) 4304 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4305 4306 Output Parameter: 4307 . mpimat - the parallel matrix generated 4308 4309 Level: advanced 4310 4311 Notes: 4312 The dimensions of the sequential matrix in each processor MUST be the same. 4313 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4314 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4315 @*/ 4316 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4317 { 4318 PetscErrorCode ierr; 4319 PetscMPIInt size; 4320 4321 PetscFunctionBegin; 4322 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4323 if (size == 1) { 4324 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4325 if (scall == MAT_INITIAL_MATRIX) { 4326 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4327 } else { 4328 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4329 } 4330 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4331 PetscFunctionReturn(0); 4332 } 4333 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4334 if (scall == MAT_INITIAL_MATRIX) { 4335 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4336 } 4337 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4338 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4339 PetscFunctionReturn(0); 4340 } 4341 4342 #undef __FUNCT__ 4343 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4344 /*@ 4345 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4346 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4347 with MatGetSize() 4348 4349 Not Collective 4350 4351 Input Parameters: 4352 + A - the matrix 4353 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4354 4355 Output Parameter: 4356 . A_loc - the local sequential matrix generated 4357 4358 Level: developer 4359 4360 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4361 4362 @*/ 4363 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4364 { 4365 PetscErrorCode ierr; 4366 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4367 Mat_SeqAIJ *mat,*a,*b; 4368 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4369 MatScalar *aa,*ba,*cam; 4370 PetscScalar *ca; 4371 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4372 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4373 PetscBool match; 4374 MPI_Comm comm; 4375 PetscMPIInt size; 4376 4377 PetscFunctionBegin; 4378 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4379 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4380 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4381 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4382 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4383 4384 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4385 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4386 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4387 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4388 aa = a->a; ba = b->a; 4389 if (scall == MAT_INITIAL_MATRIX) { 4390 if (size == 1) { 4391 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4392 PetscFunctionReturn(0); 4393 } 4394 4395 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4396 ci[0] = 0; 4397 for (i=0; i<am; i++) { 4398 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4399 } 4400 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4401 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4402 k = 0; 4403 for (i=0; i<am; i++) { 4404 ncols_o = bi[i+1] - bi[i]; 4405 ncols_d = ai[i+1] - ai[i]; 4406 /* off-diagonal portion of A */ 4407 for (jo=0; jo<ncols_o; jo++) { 4408 col = cmap[*bj]; 4409 if (col >= cstart) break; 4410 cj[k] = col; bj++; 4411 ca[k++] = *ba++; 4412 } 4413 /* diagonal portion of A */ 4414 for (j=0; j<ncols_d; j++) { 4415 cj[k] = cstart + *aj++; 4416 ca[k++] = *aa++; 4417 } 4418 /* off-diagonal portion of A */ 4419 for (j=jo; j<ncols_o; j++) { 4420 cj[k] = cmap[*bj++]; 4421 ca[k++] = *ba++; 4422 } 4423 } 4424 /* put together the new matrix */ 4425 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4426 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4427 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4428 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4429 mat->free_a = PETSC_TRUE; 4430 mat->free_ij = PETSC_TRUE; 4431 mat->nonew = 0; 4432 } else if (scall == MAT_REUSE_MATRIX) { 4433 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4434 ci = mat->i; cj = mat->j; cam = mat->a; 4435 for (i=0; i<am; i++) { 4436 /* off-diagonal portion of A */ 4437 ncols_o = bi[i+1] - bi[i]; 4438 for (jo=0; jo<ncols_o; jo++) { 4439 col = cmap[*bj]; 4440 if (col >= cstart) break; 4441 *cam++ = *ba++; bj++; 4442 } 4443 /* diagonal portion of A */ 4444 ncols_d = ai[i+1] - ai[i]; 4445 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4446 /* off-diagonal portion of A */ 4447 for (j=jo; j<ncols_o; j++) { 4448 *cam++ = *ba++; bj++; 4449 } 4450 } 4451 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4452 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4453 PetscFunctionReturn(0); 4454 } 4455 4456 #undef __FUNCT__ 4457 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4458 /*@C 4459 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4460 4461 Not Collective 4462 4463 Input Parameters: 4464 + A - the matrix 4465 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4466 - row, col - index sets of rows and columns to extract (or NULL) 4467 4468 Output Parameter: 4469 . A_loc - the local sequential matrix generated 4470 4471 Level: developer 4472 4473 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4474 4475 @*/ 4476 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4477 { 4478 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4479 PetscErrorCode ierr; 4480 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4481 IS isrowa,iscola; 4482 Mat *aloc; 4483 PetscBool match; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4487 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4488 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4489 if (!row) { 4490 start = A->rmap->rstart; end = A->rmap->rend; 4491 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4492 } else { 4493 isrowa = *row; 4494 } 4495 if (!col) { 4496 start = A->cmap->rstart; 4497 cmap = a->garray; 4498 nzA = a->A->cmap->n; 4499 nzB = a->B->cmap->n; 4500 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4501 ncols = 0; 4502 for (i=0; i<nzB; i++) { 4503 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4504 else break; 4505 } 4506 imark = i; 4507 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4508 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4509 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4510 } else { 4511 iscola = *col; 4512 } 4513 if (scall != MAT_INITIAL_MATRIX) { 4514 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4515 aloc[0] = *A_loc; 4516 } 4517 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4518 *A_loc = aloc[0]; 4519 ierr = PetscFree(aloc);CHKERRQ(ierr); 4520 if (!row) { 4521 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4522 } 4523 if (!col) { 4524 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4525 } 4526 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4527 PetscFunctionReturn(0); 4528 } 4529 4530 #undef __FUNCT__ 4531 #define __FUNCT__ "MatGetBrowsOfAcols" 4532 /*@C 4533 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4534 4535 Collective on Mat 4536 4537 Input Parameters: 4538 + A,B - the matrices in mpiaij format 4539 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4540 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4541 4542 Output Parameter: 4543 + rowb, colb - index sets of rows and columns of B to extract 4544 - B_seq - the sequential matrix generated 4545 4546 Level: developer 4547 4548 @*/ 4549 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4550 { 4551 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4552 PetscErrorCode ierr; 4553 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4554 IS isrowb,iscolb; 4555 Mat *bseq=NULL; 4556 4557 PetscFunctionBegin; 4558 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4559 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4560 } 4561 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4562 4563 if (scall == MAT_INITIAL_MATRIX) { 4564 start = A->cmap->rstart; 4565 cmap = a->garray; 4566 nzA = a->A->cmap->n; 4567 nzB = a->B->cmap->n; 4568 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4569 ncols = 0; 4570 for (i=0; i<nzB; i++) { /* row < local row index */ 4571 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4572 else break; 4573 } 4574 imark = i; 4575 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4576 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4577 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4578 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4579 } else { 4580 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4581 isrowb = *rowb; iscolb = *colb; 4582 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4583 bseq[0] = *B_seq; 4584 } 4585 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4586 *B_seq = bseq[0]; 4587 ierr = PetscFree(bseq);CHKERRQ(ierr); 4588 if (!rowb) { 4589 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4590 } else { 4591 *rowb = isrowb; 4592 } 4593 if (!colb) { 4594 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4595 } else { 4596 *colb = iscolb; 4597 } 4598 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 #undef __FUNCT__ 4603 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4604 /* 4605 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4606 of the OFF-DIAGONAL portion of local A 4607 4608 Collective on Mat 4609 4610 Input Parameters: 4611 + A,B - the matrices in mpiaij format 4612 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4613 4614 Output Parameter: 4615 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4616 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4617 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4618 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4619 4620 Level: developer 4621 4622 */ 4623 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4624 { 4625 VecScatter_MPI_General *gen_to,*gen_from; 4626 PetscErrorCode ierr; 4627 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4628 Mat_SeqAIJ *b_oth; 4629 VecScatter ctx =a->Mvctx; 4630 MPI_Comm comm; 4631 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4632 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4633 PetscScalar *rvalues,*svalues; 4634 MatScalar *b_otha,*bufa,*bufA; 4635 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4636 MPI_Request *rwaits = NULL,*swaits = NULL; 4637 MPI_Status *sstatus,rstatus; 4638 PetscMPIInt jj,size; 4639 PetscInt *cols,sbs,rbs; 4640 PetscScalar *vals; 4641 4642 PetscFunctionBegin; 4643 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4645 4646 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4647 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4648 } 4649 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4651 4652 gen_to = (VecScatter_MPI_General*)ctx->todata; 4653 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4654 rvalues = gen_from->values; /* holds the length of receiving row */ 4655 svalues = gen_to->values; /* holds the length of sending row */ 4656 nrecvs = gen_from->n; 4657 nsends = gen_to->n; 4658 4659 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4660 srow = gen_to->indices; /* local row index to be sent */ 4661 sstarts = gen_to->starts; 4662 sprocs = gen_to->procs; 4663 sstatus = gen_to->sstatus; 4664 sbs = gen_to->bs; 4665 rstarts = gen_from->starts; 4666 rprocs = gen_from->procs; 4667 rbs = gen_from->bs; 4668 4669 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4670 if (scall == MAT_INITIAL_MATRIX) { 4671 /* i-array */ 4672 /*---------*/ 4673 /* post receives */ 4674 for (i=0; i<nrecvs; i++) { 4675 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4676 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4677 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4678 } 4679 4680 /* pack the outgoing message */ 4681 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4682 4683 sstartsj[0] = 0; 4684 rstartsj[0] = 0; 4685 len = 0; /* total length of j or a array to be sent */ 4686 k = 0; 4687 for (i=0; i<nsends; i++) { 4688 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4689 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4690 for (j=0; j<nrows; j++) { 4691 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4692 for (l=0; l<sbs; l++) { 4693 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4694 4695 rowlen[j*sbs+l] = ncols; 4696 4697 len += ncols; 4698 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4699 } 4700 k++; 4701 } 4702 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4703 4704 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4705 } 4706 /* recvs and sends of i-array are completed */ 4707 i = nrecvs; 4708 while (i--) { 4709 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4710 } 4711 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4712 4713 /* allocate buffers for sending j and a arrays */ 4714 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4715 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4716 4717 /* create i-array of B_oth */ 4718 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4719 4720 b_othi[0] = 0; 4721 len = 0; /* total length of j or a array to be received */ 4722 k = 0; 4723 for (i=0; i<nrecvs; i++) { 4724 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4725 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4726 for (j=0; j<nrows; j++) { 4727 b_othi[k+1] = b_othi[k] + rowlen[j]; 4728 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4729 k++; 4730 } 4731 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4732 } 4733 4734 /* allocate space for j and a arrrays of B_oth */ 4735 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4736 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4737 4738 /* j-array */ 4739 /*---------*/ 4740 /* post receives of j-array */ 4741 for (i=0; i<nrecvs; i++) { 4742 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4743 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4744 } 4745 4746 /* pack the outgoing message j-array */ 4747 k = 0; 4748 for (i=0; i<nsends; i++) { 4749 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4750 bufJ = bufj+sstartsj[i]; 4751 for (j=0; j<nrows; j++) { 4752 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4753 for (ll=0; ll<sbs; ll++) { 4754 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4755 for (l=0; l<ncols; l++) { 4756 *bufJ++ = cols[l]; 4757 } 4758 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4759 } 4760 } 4761 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4762 } 4763 4764 /* recvs and sends of j-array are completed */ 4765 i = nrecvs; 4766 while (i--) { 4767 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4768 } 4769 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4770 } else if (scall == MAT_REUSE_MATRIX) { 4771 sstartsj = *startsj_s; 4772 rstartsj = *startsj_r; 4773 bufa = *bufa_ptr; 4774 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4775 b_otha = b_oth->a; 4776 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4777 4778 /* a-array */ 4779 /*---------*/ 4780 /* post receives of a-array */ 4781 for (i=0; i<nrecvs; i++) { 4782 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4783 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4784 } 4785 4786 /* pack the outgoing message a-array */ 4787 k = 0; 4788 for (i=0; i<nsends; i++) { 4789 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4790 bufA = bufa+sstartsj[i]; 4791 for (j=0; j<nrows; j++) { 4792 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4793 for (ll=0; ll<sbs; ll++) { 4794 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4795 for (l=0; l<ncols; l++) { 4796 *bufA++ = vals[l]; 4797 } 4798 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4799 } 4800 } 4801 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4802 } 4803 /* recvs and sends of a-array are completed */ 4804 i = nrecvs; 4805 while (i--) { 4806 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4807 } 4808 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4809 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4810 4811 if (scall == MAT_INITIAL_MATRIX) { 4812 /* put together the new matrix */ 4813 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4814 4815 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4816 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4817 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4818 b_oth->free_a = PETSC_TRUE; 4819 b_oth->free_ij = PETSC_TRUE; 4820 b_oth->nonew = 0; 4821 4822 ierr = PetscFree(bufj);CHKERRQ(ierr); 4823 if (!startsj_s || !bufa_ptr) { 4824 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4825 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4826 } else { 4827 *startsj_s = sstartsj; 4828 *startsj_r = rstartsj; 4829 *bufa_ptr = bufa; 4830 } 4831 } 4832 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4833 PetscFunctionReturn(0); 4834 } 4835 4836 #undef __FUNCT__ 4837 #define __FUNCT__ "MatGetCommunicationStructs" 4838 /*@C 4839 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4840 4841 Not Collective 4842 4843 Input Parameters: 4844 . A - The matrix in mpiaij format 4845 4846 Output Parameter: 4847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4848 . colmap - A map from global column index to local index into lvec 4849 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4850 4851 Level: developer 4852 4853 @*/ 4854 #if defined(PETSC_USE_CTABLE) 4855 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4856 #else 4857 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4858 #endif 4859 { 4860 Mat_MPIAIJ *a; 4861 4862 PetscFunctionBegin; 4863 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4864 PetscValidPointer(lvec, 2); 4865 PetscValidPointer(colmap, 3); 4866 PetscValidPointer(multScatter, 4); 4867 a = (Mat_MPIAIJ*) A->data; 4868 if (lvec) *lvec = a->lvec; 4869 if (colmap) *colmap = a->colmap; 4870 if (multScatter) *multScatter = a->Mvctx; 4871 PetscFunctionReturn(0); 4872 } 4873 4874 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4877 #if defined(PETSC_HAVE_ELEMENTAL) 4878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4879 #endif 4880 4881 #undef __FUNCT__ 4882 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4883 /* 4884 Computes (B'*A')' since computing B*A directly is untenable 4885 4886 n p p 4887 ( ) ( ) ( ) 4888 m ( A ) * n ( B ) = m ( C ) 4889 ( ) ( ) ( ) 4890 4891 */ 4892 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4893 { 4894 PetscErrorCode ierr; 4895 Mat At,Bt,Ct; 4896 4897 PetscFunctionBegin; 4898 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4899 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4900 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4901 ierr = MatDestroy(&At);CHKERRQ(ierr); 4902 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4903 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4904 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4905 PetscFunctionReturn(0); 4906 } 4907 4908 #undef __FUNCT__ 4909 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4910 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4911 { 4912 PetscErrorCode ierr; 4913 PetscInt m=A->rmap->n,n=B->cmap->n; 4914 Mat Cmat; 4915 4916 PetscFunctionBegin; 4917 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4918 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4919 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4920 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4921 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4922 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4923 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4924 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4925 4926 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4927 4928 *C = Cmat; 4929 PetscFunctionReturn(0); 4930 } 4931 4932 /* ----------------------------------------------------------------*/ 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4935 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4936 { 4937 PetscErrorCode ierr; 4938 4939 PetscFunctionBegin; 4940 if (scall == MAT_INITIAL_MATRIX) { 4941 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4942 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4943 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4944 } 4945 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4946 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4947 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4948 PetscFunctionReturn(0); 4949 } 4950 4951 /*MC 4952 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4953 4954 Options Database Keys: 4955 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4956 4957 Level: beginner 4958 4959 .seealso: MatCreateAIJ() 4960 M*/ 4961 4962 #undef __FUNCT__ 4963 #define __FUNCT__ "MatCreate_MPIAIJ" 4964 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4965 { 4966 Mat_MPIAIJ *b; 4967 PetscErrorCode ierr; 4968 PetscMPIInt size; 4969 4970 PetscFunctionBegin; 4971 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4972 4973 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4974 B->data = (void*)b; 4975 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4976 B->assembled = PETSC_FALSE; 4977 B->insertmode = NOT_SET_VALUES; 4978 b->size = size; 4979 4980 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4981 4982 /* build cache for off array entries formed */ 4983 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4984 4985 b->donotstash = PETSC_FALSE; 4986 b->colmap = 0; 4987 b->garray = 0; 4988 b->roworiented = PETSC_TRUE; 4989 4990 /* stuff used for matrix vector multiply */ 4991 b->lvec = NULL; 4992 b->Mvctx = NULL; 4993 4994 /* stuff for MatGetRow() */ 4995 b->rowindices = 0; 4996 b->rowvalues = 0; 4997 b->getrowactive = PETSC_FALSE; 4998 4999 /* flexible pointer used in CUSP/CUSPARSE classes */ 5000 b->spptr = NULL; 5001 5002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5012 #if defined(PETSC_HAVE_ELEMENTAL) 5013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5014 #endif 5015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5018 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5019 PetscFunctionReturn(0); 5020 } 5021 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5024 /*@C 5025 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5026 and "off-diagonal" part of the matrix in CSR format. 5027 5028 Collective on MPI_Comm 5029 5030 Input Parameters: 5031 + comm - MPI communicator 5032 . m - number of local rows (Cannot be PETSC_DECIDE) 5033 . n - This value should be the same as the local size used in creating the 5034 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5035 calculated if N is given) For square matrices n is almost always m. 5036 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5037 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5038 . i - row indices for "diagonal" portion of matrix 5039 . j - column indices 5040 . a - matrix values 5041 . oi - row indices for "off-diagonal" portion of matrix 5042 . oj - column indices 5043 - oa - matrix values 5044 5045 Output Parameter: 5046 . mat - the matrix 5047 5048 Level: advanced 5049 5050 Notes: 5051 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5052 must free the arrays once the matrix has been destroyed and not before. 5053 5054 The i and j indices are 0 based 5055 5056 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5057 5058 This sets local rows and cannot be used to set off-processor values. 5059 5060 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5061 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5062 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5063 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5064 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5065 communication if it is known that only local entries will be set. 5066 5067 .keywords: matrix, aij, compressed row, sparse, parallel 5068 5069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5070 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5071 @*/ 5072 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5073 { 5074 PetscErrorCode ierr; 5075 Mat_MPIAIJ *maij; 5076 5077 PetscFunctionBegin; 5078 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5079 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5080 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5081 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5082 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5083 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5084 maij = (Mat_MPIAIJ*) (*mat)->data; 5085 5086 (*mat)->preallocated = PETSC_TRUE; 5087 5088 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5089 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5090 5091 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5092 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5093 5094 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5095 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5096 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5097 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5098 5099 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5100 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5101 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5102 PetscFunctionReturn(0); 5103 } 5104 5105 /* 5106 Special version for direct calls from Fortran 5107 */ 5108 #include <petsc/private/fortranimpl.h> 5109 5110 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5111 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5112 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5113 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5114 #endif 5115 5116 /* Change these macros so can be used in void function */ 5117 #undef CHKERRQ 5118 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5119 #undef SETERRQ2 5120 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5121 #undef SETERRQ3 5122 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5123 #undef SETERRQ 5124 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5125 5126 #undef __FUNCT__ 5127 #define __FUNCT__ "matsetvaluesmpiaij_" 5128 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5129 { 5130 Mat mat = *mmat; 5131 PetscInt m = *mm, n = *mn; 5132 InsertMode addv = *maddv; 5133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5134 PetscScalar value; 5135 PetscErrorCode ierr; 5136 5137 MatCheckPreallocated(mat,1); 5138 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5139 5140 #if defined(PETSC_USE_DEBUG) 5141 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5142 #endif 5143 { 5144 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5145 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5146 PetscBool roworiented = aij->roworiented; 5147 5148 /* Some Variables required in the macro */ 5149 Mat A = aij->A; 5150 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5151 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5152 MatScalar *aa = a->a; 5153 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5154 Mat B = aij->B; 5155 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5156 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5157 MatScalar *ba = b->a; 5158 5159 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5160 PetscInt nonew = a->nonew; 5161 MatScalar *ap1,*ap2; 5162 5163 PetscFunctionBegin; 5164 for (i=0; i<m; i++) { 5165 if (im[i] < 0) continue; 5166 #if defined(PETSC_USE_DEBUG) 5167 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5168 #endif 5169 if (im[i] >= rstart && im[i] < rend) { 5170 row = im[i] - rstart; 5171 lastcol1 = -1; 5172 rp1 = aj + ai[row]; 5173 ap1 = aa + ai[row]; 5174 rmax1 = aimax[row]; 5175 nrow1 = ailen[row]; 5176 low1 = 0; 5177 high1 = nrow1; 5178 lastcol2 = -1; 5179 rp2 = bj + bi[row]; 5180 ap2 = ba + bi[row]; 5181 rmax2 = bimax[row]; 5182 nrow2 = bilen[row]; 5183 low2 = 0; 5184 high2 = nrow2; 5185 5186 for (j=0; j<n; j++) { 5187 if (roworiented) value = v[i*n+j]; 5188 else value = v[i+j*m]; 5189 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5190 if (in[j] >= cstart && in[j] < cend) { 5191 col = in[j] - cstart; 5192 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5193 } else if (in[j] < 0) continue; 5194 #if defined(PETSC_USE_DEBUG) 5195 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5196 #endif 5197 else { 5198 if (mat->was_assembled) { 5199 if (!aij->colmap) { 5200 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5201 } 5202 #if defined(PETSC_USE_CTABLE) 5203 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5204 col--; 5205 #else 5206 col = aij->colmap[in[j]] - 1; 5207 #endif 5208 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5209 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5210 col = in[j]; 5211 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5212 B = aij->B; 5213 b = (Mat_SeqAIJ*)B->data; 5214 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5215 rp2 = bj + bi[row]; 5216 ap2 = ba + bi[row]; 5217 rmax2 = bimax[row]; 5218 nrow2 = bilen[row]; 5219 low2 = 0; 5220 high2 = nrow2; 5221 bm = aij->B->rmap->n; 5222 ba = b->a; 5223 } 5224 } else col = in[j]; 5225 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5226 } 5227 } 5228 } else if (!aij->donotstash) { 5229 if (roworiented) { 5230 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5231 } else { 5232 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5233 } 5234 } 5235 } 5236 } 5237 PetscFunctionReturnVoid(); 5238 } 5239 5240