1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ" 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 63 { 64 PetscErrorCode ierr; 65 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 66 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 67 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 68 const PetscInt *ia,*ib; 69 const MatScalar *aa,*bb; 70 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 71 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 72 73 PetscFunctionBegin; 74 *keptrows = 0; 75 ia = a->i; 76 ib = b->i; 77 for (i=0; i<m; i++) { 78 na = ia[i+1] - ia[i]; 79 nb = ib[i+1] - ib[i]; 80 if (!na && !nb) { 81 cnt++; 82 goto ok1; 83 } 84 aa = a->a + ia[i]; 85 for (j=0; j<na; j++) { 86 if (aa[j] != 0.0) goto ok1; 87 } 88 bb = b->a + ib[i]; 89 for (j=0; j <nb; j++) { 90 if (bb[j] != 0.0) goto ok1; 91 } 92 cnt++; 93 ok1:; 94 } 95 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 96 if (!n0rows) PetscFunctionReturn(0); 97 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 98 cnt = 0; 99 for (i=0; i<m; i++) { 100 na = ia[i+1] - ia[i]; 101 nb = ib[i+1] - ib[i]; 102 if (!na && !nb) continue; 103 aa = a->a + ia[i]; 104 for (j=0; j<na;j++) { 105 if (aa[j] != 0.0) { 106 rows[cnt++] = rstart + i; 107 goto ok2; 108 } 109 } 110 bb = b->a + ib[i]; 111 for (j=0; j<nb; j++) { 112 if (bb[j] != 0.0) { 113 rows[cnt++] = rstart + i; 114 goto ok2; 115 } 116 } 117 ok2:; 118 } 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 125 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 129 130 PetscFunctionBegin; 131 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 132 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 133 } else { 134 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 135 } 136 PetscFunctionReturn(0); 137 } 138 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 145 PetscErrorCode ierr; 146 PetscInt i,rstart,nrows,*rows; 147 148 PetscFunctionBegin; 149 *zrows = NULL; 150 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 151 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 152 for (i=0; i<nrows; i++) rows[i] += rstart; 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 163 PetscInt i,n,*garray = aij->garray; 164 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 165 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 166 PetscReal *work; 167 168 PetscFunctionBegin; 169 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 170 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 171 if (type == NORM_2) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 177 } 178 } else if (type == NORM_1) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 184 } 185 } else if (type == NORM_INFINITY) { 186 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 187 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 188 } 189 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 190 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 191 } 192 193 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 194 if (type == NORM_INFINITY) { 195 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 196 } else { 197 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 198 } 199 ierr = PetscFree(work);CHKERRQ(ierr); 200 if (type == NORM_2) { 201 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 202 } 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNCT__ 207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 209 { 210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 211 IS sis,gis; 212 PetscErrorCode ierr; 213 const PetscInt *isis,*igis; 214 PetscInt n,*iis,nsis,ngis,rstart,i; 215 216 PetscFunctionBegin; 217 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 218 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 219 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 220 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 221 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 223 224 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 225 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 226 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 227 n = ngis + nsis; 228 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 230 for (i=0; i<n; i++) iis[i] += rstart; 231 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 232 233 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 235 ierr = ISDestroy(&sis);CHKERRQ(ierr); 236 ierr = ISDestroy(&gis);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatDistribute_MPIAIJ" 242 /* 243 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 244 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 245 246 Only for square matrices 247 248 Used by a preconditioner, hence PETSC_EXTERN 249 */ 250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 251 { 252 PetscMPIInt rank,size; 253 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 254 PetscErrorCode ierr; 255 Mat mat; 256 Mat_SeqAIJ *gmata; 257 PetscMPIInt tag; 258 MPI_Status status; 259 PetscBool aij; 260 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 261 262 PetscFunctionBegin; 263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 265 if (!rank) { 266 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 267 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 268 } 269 if (reuse == MAT_INITIAL_MATRIX) { 270 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 271 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 272 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 273 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 274 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 275 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 276 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 277 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 278 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 279 280 rowners[0] = 0; 281 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 282 rstart = rowners[rank]; 283 rend = rowners[rank+1]; 284 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 285 if (!rank) { 286 gmata = (Mat_SeqAIJ*) gmat->data; 287 /* send row lengths to all processors */ 288 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 289 for (i=1; i<size; i++) { 290 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 291 } 292 /* determine number diagonal and off-diagonal counts */ 293 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 294 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 295 jj = 0; 296 for (i=0; i<m; i++) { 297 for (j=0; j<dlens[i]; j++) { 298 if (gmata->j[jj] < rstart) ld[i]++; 299 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 300 jj++; 301 } 302 } 303 /* send column indices to other processes */ 304 for (i=1; i<size; i++) { 305 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 306 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 307 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 308 } 309 310 /* send numerical values to other processes */ 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 gmataa = gmata->a; 316 gmataj = gmata->j; 317 318 } else { 319 /* receive row lengths */ 320 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 321 /* receive column indices */ 322 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 323 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 325 /* determine number diagonal and off-diagonal counts */ 326 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 327 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 328 jj = 0; 329 for (i=0; i<m; i++) { 330 for (j=0; j<dlens[i]; j++) { 331 if (gmataj[jj] < rstart) ld[i]++; 332 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 333 jj++; 334 } 335 } 336 /* receive numerical values */ 337 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 339 } 340 /* set preallocation */ 341 for (i=0; i<m; i++) { 342 dlens[i] -= olens[i]; 343 } 344 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 345 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 346 347 for (i=0; i<m; i++) { 348 dlens[i] += olens[i]; 349 } 350 cnt = 0; 351 for (i=0; i<m; i++) { 352 row = rstart + i; 353 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 354 cnt += dlens[i]; 355 } 356 if (rank) { 357 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 358 } 359 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 360 ierr = PetscFree(rowners);CHKERRQ(ierr); 361 362 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 363 364 *inmat = mat; 365 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 366 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 367 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 368 mat = *inmat; 369 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 370 if (!rank) { 371 /* send numerical values to other processes */ 372 gmata = (Mat_SeqAIJ*) gmat->data; 373 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 374 gmataa = gmata->a; 375 for (i=1; i<size; i++) { 376 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 377 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 378 } 379 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 380 } else { 381 /* receive numerical values from process 0*/ 382 nz = Ad->nz + Ao->nz; 383 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 384 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 385 } 386 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 387 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 388 ad = Ad->a; 389 ao = Ao->a; 390 if (mat->rmap->n) { 391 i = 0; 392 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 393 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 394 } 395 for (i=1; i<mat->rmap->n; i++) { 396 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 397 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 398 } 399 i--; 400 if (mat->rmap->n) { 401 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (rank) { 404 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 405 } 406 } 407 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 408 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* 413 Local utility routine that creates a mapping from the global column 414 number to the local number in the off-diagonal part of the local 415 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 416 a slightly higher hash table cost; without it it is not scalable (each processor 417 has an order N integer array but is fast to acess. 418 */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 422 { 423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 424 PetscErrorCode ierr; 425 PetscInt n = aij->B->cmap->n,i; 426 427 PetscFunctionBegin; 428 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 431 for (i=0; i<n; i++) { 432 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 433 } 434 #else 435 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 436 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 437 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 438 #endif 439 PetscFunctionReturn(0); 440 } 441 442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 443 { \ 444 if (col <= lastcol1) low1 = 0; \ 445 else high1 = nrow1; \ 446 lastcol1 = col;\ 447 while (high1-low1 > 5) { \ 448 t = (low1+high1)/2; \ 449 if (rp1[t] > col) high1 = t; \ 450 else low1 = t; \ 451 } \ 452 for (_i=low1; _i<high1; _i++) { \ 453 if (rp1[_i] > col) break; \ 454 if (rp1[_i] == col) { \ 455 if (addv == ADD_VALUES) ap1[_i] += value; \ 456 else ap1[_i] = value; \ 457 goto a_noinsert; \ 458 } \ 459 } \ 460 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 461 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 462 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 464 N = nrow1++ - 1; a->nz++; high1++; \ 465 /* shift up all the later entries in this row */ \ 466 for (ii=N; ii>=_i; ii--) { \ 467 rp1[ii+1] = rp1[ii]; \ 468 ap1[ii+1] = ap1[ii]; \ 469 } \ 470 rp1[_i] = col; \ 471 ap1[_i] = value; \ 472 A->nonzerostate++;\ 473 a_noinsert: ; \ 474 ailen[row] = nrow1; \ 475 } 476 477 478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 479 { \ 480 if (col <= lastcol2) low2 = 0; \ 481 else high2 = nrow2; \ 482 lastcol2 = col; \ 483 while (high2-low2 > 5) { \ 484 t = (low2+high2)/2; \ 485 if (rp2[t] > col) high2 = t; \ 486 else low2 = t; \ 487 } \ 488 for (_i=low2; _i<high2; _i++) { \ 489 if (rp2[_i] > col) break; \ 490 if (rp2[_i] == col) { \ 491 if (addv == ADD_VALUES) ap2[_i] += value; \ 492 else ap2[_i] = value; \ 493 goto b_noinsert; \ 494 } \ 495 } \ 496 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 497 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 499 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 500 N = nrow2++ - 1; b->nz++; high2++; \ 501 /* shift up all the later entries in this row */ \ 502 for (ii=N; ii>=_i; ii--) { \ 503 rp2[ii+1] = rp2[ii]; \ 504 ap2[ii+1] = ap2[ii]; \ 505 } \ 506 rp2[_i] = col; \ 507 ap2[_i] = value; \ 508 B->nonzerostate++; \ 509 b_noinsert: ; \ 510 bilen[row] = nrow2; \ 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 516 { 517 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 519 PetscErrorCode ierr; 520 PetscInt l,*garray = mat->garray,diag; 521 522 PetscFunctionBegin; 523 /* code only works for square matrices A */ 524 525 /* find size of row to the left of the diagonal part */ 526 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 527 row = row - diag; 528 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 529 if (garray[b->j[b->i[row]+l]] > diag) break; 530 } 531 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 532 533 /* diagonal part */ 534 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 535 536 /* right of diagonal part */ 537 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatSetValues_MPIAIJ" 543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 PetscScalar value; 547 PetscErrorCode ierr; 548 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 549 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 550 PetscBool roworiented = aij->roworiented; 551 552 /* Some Variables required in the macro */ 553 Mat A = aij->A; 554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 555 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 556 MatScalar *aa = a->a; 557 PetscBool ignorezeroentries = a->ignorezeroentries; 558 Mat B = aij->B; 559 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 560 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 561 MatScalar *ba = b->a; 562 563 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 564 PetscInt nonew; 565 MatScalar *ap1,*ap2; 566 567 PetscFunctionBegin; 568 for (i=0; i<m; i++) { 569 if (im[i] < 0) continue; 570 #if defined(PETSC_USE_DEBUG) 571 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 572 #endif 573 if (im[i] >= rstart && im[i] < rend) { 574 row = im[i] - rstart; 575 lastcol1 = -1; 576 rp1 = aj + ai[row]; 577 ap1 = aa + ai[row]; 578 rmax1 = aimax[row]; 579 nrow1 = ailen[row]; 580 low1 = 0; 581 high1 = nrow1; 582 lastcol2 = -1; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 590 for (j=0; j<n; j++) { 591 if (roworiented) value = v[i*n+j]; 592 else value = v[i+j*m]; 593 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 594 if (in[j] >= cstart && in[j] < cend) { 595 col = in[j] - cstart; 596 nonew = a->nonew; 597 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 598 } else if (in[j] < 0) continue; 599 #if defined(PETSC_USE_DEBUG) 600 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 601 #endif 602 else { 603 if (mat->was_assembled) { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[in[j]] - 1; 612 #endif 613 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 614 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 615 col = in[j]; 616 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 617 B = aij->B; 618 b = (Mat_SeqAIJ*)B->data; 619 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 620 rp2 = bj + bi[row]; 621 ap2 = ba + bi[row]; 622 rmax2 = bimax[row]; 623 nrow2 = bilen[row]; 624 low2 = 0; 625 high2 = nrow2; 626 bm = aij->B->rmap->n; 627 ba = b->a; 628 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 629 } else col = in[j]; 630 nonew = b->nonew; 631 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 632 } 633 } 634 } else { 635 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 636 if (!aij->donotstash) { 637 mat->assembled = PETSC_FALSE; 638 if (roworiented) { 639 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 640 } else { 641 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 642 } 643 } 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatGetValues_MPIAIJ" 651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 656 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 657 658 PetscFunctionBegin; 659 for (i=0; i<m; i++) { 660 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 661 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 662 if (idxm[i] >= rstart && idxm[i] < rend) { 663 row = idxm[i] - rstart; 664 for (j=0; j<n; j++) { 665 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 666 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 667 if (idxn[j] >= cstart && idxn[j] < cend) { 668 col = idxn[j] - cstart; 669 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 670 } else { 671 if (!aij->colmap) { 672 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 673 } 674 #if defined(PETSC_USE_CTABLE) 675 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 676 col--; 677 #else 678 col = aij->colmap[idxn[j]] - 1; 679 #endif 680 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 681 else { 682 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 683 } 684 } 685 } 686 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 696 { 697 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 698 PetscErrorCode ierr; 699 PetscInt nstash,reallocs; 700 701 PetscFunctionBegin; 702 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 703 704 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 705 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 706 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt n; 718 PetscInt i,j,rstart,ncols,flg; 719 PetscInt *row,*col; 720 PetscBool other_disassembled; 721 PetscScalar *val; 722 723 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 724 725 PetscFunctionBegin; 726 if (!aij->donotstash && !mat->nooffprocentries) { 727 while (1) { 728 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 729 if (!flg) break; 730 731 for (i=0; i<n; ) { 732 /* Now identify the consecutive vals belonging to the same row */ 733 for (j=i,rstart=row[j]; j<n; j++) { 734 if (row[j] != rstart) break; 735 } 736 if (j < n) ncols = j-i; 737 else ncols = n-i; 738 /* Now assemble all these values with a single function call */ 739 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 740 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 745 } 746 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 748 749 /* determine if any processor has disassembled, if so we must 750 also disassemble ourselfs, in order that we may reassemble. */ 751 /* 752 if nonzero structure of submatrix B cannot change then we know that 753 no processor disassembled thus we can skip this stuff 754 */ 755 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 756 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 if (mat->was_assembled && !other_disassembled) { 758 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 759 } 760 } 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 763 } 764 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 767 768 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 769 770 aij->rowvalues = 0; 771 772 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 773 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 774 775 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 776 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 777 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 778 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 786 { 787 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 792 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatZeroRows_MPIAIJ" 798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 799 { 800 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 801 PetscInt *lrows; 802 PetscInt r, len; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 /* get locally owned rows */ 807 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 808 /* fix right hand side if needed */ 809 if (x && b) { 810 const PetscScalar *xx; 811 PetscScalar *bb; 812 813 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 814 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 816 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 817 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 818 } 819 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 820 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 822 PetscBool cong; 823 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 824 if (cong) A->congruentlayouts = 1; 825 else A->congruentlayouts = 0; 826 } 827 if ((diag != 0.0) && A->congruentlayouts) { 828 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 829 } else if (diag != 0.0) { 830 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 831 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 832 for (r = 0; r < len; ++r) { 833 const PetscInt row = lrows[r] + A->rmap->rstart; 834 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 835 } 836 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 837 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 } else { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 843 /* only change matrix nonzero state if pattern was allowed to be changed */ 844 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 845 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscMPIInt n = A->rmap->n; 858 PetscInt i,j,r,m,p = 0,len = 0; 859 PetscInt *lrows,*owners = A->rmap->range; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x) { 903 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 906 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 907 } 908 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 909 /* remove zeroed rows of off diagonal matrix */ 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij->a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij->a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 955 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 956 ierr = PetscFree(lrows);CHKERRQ(ierr); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMult_MPIAIJ" 968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt nt; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultAdd_MPIAIJ" 998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 PetscBool merged; 1018 1019 PetscFunctionBegin; 1020 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1021 /* do nondiagonal part */ 1022 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1023 if (!merged) { 1024 /* send it on its way */ 1025 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1026 /* do local part */ 1027 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1028 /* receive remote parts: note this assumes the values are not actually */ 1029 /* added in yy until the next line, */ 1030 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1031 } else { 1032 /* do local part */ 1033 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1034 /* send it on its way */ 1035 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 /* values actually were received in the Begin() but we need to call this nop */ 1037 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1044 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1045 { 1046 MPI_Comm comm; 1047 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1048 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1049 IS Me,Notme; 1050 PetscErrorCode ierr; 1051 PetscInt M,N,first,last,*notme,i; 1052 PetscMPIInt size; 1053 1054 PetscFunctionBegin; 1055 /* Easy test: symmetric diagonal block */ 1056 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1057 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1058 if (!*f) PetscFunctionReturn(0); 1059 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1061 if (size == 1) PetscFunctionReturn(0); 1062 1063 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1064 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1065 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1066 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1067 for (i=0; i<first; i++) notme[i] = i; 1068 for (i=last; i<M; i++) notme[i-last+first] = i; 1069 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1070 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1071 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1072 Aoff = Aoffs[0]; 1073 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1074 Boff = Boffs[0]; 1075 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1076 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1077 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1078 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1079 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1080 ierr = PetscFree(notme);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 /* do nondiagonal part */ 1093 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1094 /* send it on its way */ 1095 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 /* do local part */ 1097 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1098 /* receive remote parts */ 1099 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* 1104 This only works correctly for square matrices where the subblock A->A is the 1105 diagonal block 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1110 { 1111 PetscErrorCode ierr; 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 1114 PetscFunctionBegin; 1115 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1116 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1117 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatScale_MPIAIJ" 1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1130 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatDestroy_MPIAIJ" 1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1137 { 1138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 #if defined(PETSC_USE_LOG) 1143 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1144 #endif 1145 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1146 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1147 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1149 #if defined(PETSC_USE_CTABLE) 1150 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1151 #else 1152 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1153 #endif 1154 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1156 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1157 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1158 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1159 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1160 1161 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1169 #if defined(PETSC_HAVE_ELEMENTAL) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 #endif 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1180 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1181 { 1182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1183 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1184 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1185 PetscErrorCode ierr; 1186 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1187 int fd; 1188 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1189 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1190 PetscScalar *column_values; 1191 PetscInt message_count,flowcontrolcount; 1192 FILE *file; 1193 1194 PetscFunctionBegin; 1195 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1196 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1197 nz = A->nz + B->nz; 1198 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1199 if (!rank) { 1200 header[0] = MAT_FILE_CLASSID; 1201 header[1] = mat->rmap->N; 1202 header[2] = mat->cmap->N; 1203 1204 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1205 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 /* get largest number of rows any processor has */ 1207 rlen = mat->rmap->n; 1208 range = mat->rmap->range; 1209 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1210 } else { 1211 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 rlen = mat->rmap->n; 1213 } 1214 1215 /* load up the local row counts */ 1216 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1217 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1218 1219 /* store the row lengths to the file */ 1220 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1221 if (!rank) { 1222 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 for (i=1; i<size; i++) { 1224 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1225 rlen = range[i+1] - range[i]; 1226 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1228 } 1229 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1230 } else { 1231 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1232 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1233 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1234 } 1235 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1236 1237 /* load up the local column indices */ 1238 nzmax = nz; /* th processor needs space a largest processor needs */ 1239 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1241 cnt = 0; 1242 for (i=0; i<mat->rmap->n; i++) { 1243 for (j=B->i[i]; j<B->i[i+1]; j++) { 1244 if ((col = garray[B->j[j]]) > cstart) break; 1245 column_indices[cnt++] = col; 1246 } 1247 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1248 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1249 } 1250 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1251 1252 /* store the column indices to the file */ 1253 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1254 if (!rank) { 1255 MPI_Status status; 1256 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1257 for (i=1; i<size; i++) { 1258 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1259 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1260 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1261 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1265 } else { 1266 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1267 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1270 } 1271 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1272 1273 /* load up the local column values */ 1274 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1275 cnt = 0; 1276 for (i=0; i<mat->rmap->n; i++) { 1277 for (j=B->i[i]; j<B->i[i+1]; j++) { 1278 if (garray[B->j[j]] > cstart) break; 1279 column_values[cnt++] = B->a[j]; 1280 } 1281 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1282 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1283 } 1284 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1285 1286 /* store the column values to the file */ 1287 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1288 if (!rank) { 1289 MPI_Status status; 1290 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1291 for (i=1; i<size; i++) { 1292 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1293 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1294 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1295 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1297 } 1298 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1299 } else { 1300 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1301 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1304 } 1305 ierr = PetscFree(column_values);CHKERRQ(ierr); 1306 1307 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1308 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #include <petscdraw.h> 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1315 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1316 { 1317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1318 PetscErrorCode ierr; 1319 PetscMPIInt rank = aij->rank,size = aij->size; 1320 PetscBool isdraw,iascii,isbinary; 1321 PetscViewer sviewer; 1322 PetscViewerFormat format; 1323 1324 PetscFunctionBegin; 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1328 if (iascii) { 1329 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1330 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1331 MatInfo info; 1332 PetscBool inodes; 1333 1334 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1335 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1336 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1337 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1338 if (!inodes) { 1339 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1340 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1343 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1344 } 1345 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1347 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1348 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1349 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1352 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1355 PetscInt inodecount,inodelimit,*inodes; 1356 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1357 if (inodes) { 1358 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1361 } 1362 PetscFunctionReturn(0); 1363 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1364 PetscFunctionReturn(0); 1365 } 1366 } else if (isbinary) { 1367 if (size == 1) { 1368 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1369 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1370 } else { 1371 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1372 } 1373 PetscFunctionReturn(0); 1374 } else if (isdraw) { 1375 PetscDraw draw; 1376 PetscBool isnull; 1377 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1378 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1379 if (isnull) PetscFunctionReturn(0); 1380 } 1381 1382 { 1383 /* assemble the entire matrix onto first processor. */ 1384 Mat A; 1385 Mat_SeqAIJ *Aloc; 1386 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1387 MatScalar *a; 1388 1389 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1390 if (!rank) { 1391 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1392 } else { 1393 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1394 } 1395 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1396 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1397 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1398 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1399 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1400 1401 /* copy over the A part */ 1402 Aloc = (Mat_SeqAIJ*)aij->A->data; 1403 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1404 row = mat->rmap->rstart; 1405 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; 1409 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1410 } 1411 aj = Aloc->j; 1412 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1413 1414 /* copy over the B part */ 1415 Aloc = (Mat_SeqAIJ*)aij->B->data; 1416 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1417 row = mat->rmap->rstart; 1418 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1419 ct = cols; 1420 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; 1424 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 /* 1430 Everyone has to call to draw the matrix since the graphics waits are 1431 synchronized across all processors that share the PetscDraw object 1432 */ 1433 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1434 if (!rank) { 1435 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1436 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1439 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1440 ierr = MatDestroy(&A);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatView_MPIAIJ" 1447 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1448 { 1449 PetscErrorCode ierr; 1450 PetscBool iascii,isdraw,issocket,isbinary; 1451 1452 PetscFunctionBegin; 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1457 if (iascii || isdraw || isbinary || issocket) { 1458 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatSOR_MPIAIJ" 1465 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1466 { 1467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1468 PetscErrorCode ierr; 1469 Vec bb1 = 0; 1470 PetscBool hasop; 1471 1472 PetscFunctionBegin; 1473 if (flag == SOR_APPLY_UPPER) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1479 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1480 } 1481 1482 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 1488 while (its--) { 1489 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 1492 /* update rhs: bb1 = bb - B*x */ 1493 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1494 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1495 1496 /* local sweep */ 1497 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1498 } 1499 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1500 if (flag & SOR_ZERO_INITIAL_GUESS) { 1501 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1502 its--; 1503 } 1504 while (its--) { 1505 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 1508 /* update rhs: bb1 = bb - B*x */ 1509 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1510 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1511 1512 /* local sweep */ 1513 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1514 } 1515 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1516 if (flag & SOR_ZERO_INITIAL_GUESS) { 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1518 its--; 1519 } 1520 while (its--) { 1521 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 1524 /* update rhs: bb1 = bb - B*x */ 1525 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1526 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1527 1528 /* local sweep */ 1529 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1530 } 1531 } else if (flag & SOR_EISENSTAT) { 1532 Vec xx1; 1533 1534 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1535 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1536 1537 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 if (!mat->diag) { 1540 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1541 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1542 } 1543 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1544 if (hasop) { 1545 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1546 } else { 1547 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1548 } 1549 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1550 1551 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1552 1553 /* local sweep */ 1554 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1555 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1556 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1557 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1558 1559 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1560 1561 matin->factorerrortype = mat->A->factorerrortype; 1562 PetscFunctionReturn(0); 1563 } 1564 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatPermute_MPIAIJ" 1567 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1568 { 1569 Mat aA,aB,Aperm; 1570 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1571 PetscScalar *aa,*ba; 1572 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1573 PetscSF rowsf,sf; 1574 IS parcolp = NULL; 1575 PetscBool done; 1576 PetscErrorCode ierr; 1577 1578 PetscFunctionBegin; 1579 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1580 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1581 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1582 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1583 1584 /* Invert row permutation to find out where my rows should go */ 1585 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1586 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1587 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1588 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1589 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1590 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1591 1592 /* Invert column permutation to find out where my columns should go */ 1593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1594 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1595 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1596 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1597 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1599 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1600 1601 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1603 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1604 1605 /* Find out where my gcols should go */ 1606 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1607 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1608 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1609 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1610 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1611 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1612 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1614 1615 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1616 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1617 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1618 for (i=0; i<m; i++) { 1619 PetscInt row = rdest[i],rowner; 1620 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1621 for (j=ai[i]; j<ai[i+1]; j++) { 1622 PetscInt cowner,col = cdest[aj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 for (j=bi[i]; j<bi[i+1]; j++) { 1628 PetscInt cowner,col = gcdest[bj[j]]; 1629 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1630 if (rowner == cowner) dnnz[i]++; 1631 else onnz[i]++; 1632 } 1633 } 1634 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1635 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1636 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1637 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1639 1640 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1641 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1643 for (i=0; i<m; i++) { 1644 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1645 PetscInt j0,rowlen; 1646 rowlen = ai[i+1] - ai[i]; 1647 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1648 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1649 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1650 } 1651 rowlen = bi[i+1] - bi[i]; 1652 for (j0=j=0; j<rowlen; j0=j) { 1653 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1654 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 } 1657 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1658 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1660 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1661 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1662 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1663 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1664 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1665 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1666 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1667 *B = Aperm; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1673 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1674 { 1675 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1680 if (ghosts) *ghosts = aij->garray; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1686 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1687 { 1688 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1689 Mat A = mat->A,B = mat->B; 1690 PetscErrorCode ierr; 1691 PetscReal isend[5],irecv[5]; 1692 1693 PetscFunctionBegin; 1694 info->block_size = 1.0; 1695 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1696 1697 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1698 isend[3] = info->memory; isend[4] = info->mallocs; 1699 1700 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1701 1702 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1703 isend[3] += info->memory; isend[4] += info->mallocs; 1704 if (flag == MAT_LOCAL) { 1705 info->nz_used = isend[0]; 1706 info->nz_allocated = isend[1]; 1707 info->nz_unneeded = isend[2]; 1708 info->memory = isend[3]; 1709 info->mallocs = isend[4]; 1710 } else if (flag == MAT_GLOBAL_MAX) { 1711 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1712 1713 info->nz_used = irecv[0]; 1714 info->nz_allocated = irecv[1]; 1715 info->nz_unneeded = irecv[2]; 1716 info->memory = irecv[3]; 1717 info->mallocs = irecv[4]; 1718 } else if (flag == MAT_GLOBAL_SUM) { 1719 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1720 1721 info->nz_used = irecv[0]; 1722 info->nz_allocated = irecv[1]; 1723 info->nz_unneeded = irecv[2]; 1724 info->memory = irecv[3]; 1725 info->mallocs = irecv[4]; 1726 } 1727 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1728 info->fill_ratio_needed = 0; 1729 info->factor_mallocs = 0; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatSetOption_MPIAIJ" 1735 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1736 { 1737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 1740 PetscFunctionBegin; 1741 switch (op) { 1742 case MAT_NEW_NONZERO_LOCATIONS: 1743 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1744 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1745 case MAT_KEEP_NONZERO_PATTERN: 1746 case MAT_NEW_NONZERO_LOCATION_ERR: 1747 case MAT_USE_INODES: 1748 case MAT_IGNORE_ZERO_ENTRIES: 1749 MatCheckPreallocated(A,1); 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1752 break; 1753 case MAT_ROW_ORIENTED: 1754 MatCheckPreallocated(A,1); 1755 a->roworiented = flg; 1756 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_NEW_DIAGONALS: 1761 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1762 break; 1763 case MAT_IGNORE_OFF_PROC_ENTRIES: 1764 a->donotstash = flg; 1765 break; 1766 case MAT_SPD: 1767 A->spd_set = PETSC_TRUE; 1768 A->spd = flg; 1769 if (flg) { 1770 A->symmetric = PETSC_TRUE; 1771 A->structurally_symmetric = PETSC_TRUE; 1772 A->symmetric_set = PETSC_TRUE; 1773 A->structurally_symmetric_set = PETSC_TRUE; 1774 } 1775 break; 1776 case MAT_SYMMETRIC: 1777 MatCheckPreallocated(A,1); 1778 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1779 break; 1780 case MAT_STRUCTURALLY_SYMMETRIC: 1781 MatCheckPreallocated(A,1); 1782 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1783 break; 1784 case MAT_HERMITIAN: 1785 MatCheckPreallocated(A,1); 1786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1787 break; 1788 case MAT_SYMMETRY_ETERNAL: 1789 MatCheckPreallocated(A,1); 1790 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1791 break; 1792 case MAT_SUBMAT_SINGLEIS: 1793 A->submat_singleis = flg; 1794 break; 1795 default: 1796 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1797 } 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatGetRow_MPIAIJ" 1803 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1804 { 1805 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1806 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1807 PetscErrorCode ierr; 1808 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1809 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1810 PetscInt *cmap,*idx_p; 1811 1812 PetscFunctionBegin; 1813 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1814 mat->getrowactive = PETSC_TRUE; 1815 1816 if (!mat->rowvalues && (idx || v)) { 1817 /* 1818 allocate enough space to hold information from the longest row. 1819 */ 1820 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1821 PetscInt max = 1,tmp; 1822 for (i=0; i<matin->rmap->n; i++) { 1823 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1824 if (max < tmp) max = tmp; 1825 } 1826 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1827 } 1828 1829 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1830 lrow = row - rstart; 1831 1832 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1833 if (!v) {pvA = 0; pvB = 0;} 1834 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1835 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1836 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1837 nztot = nzA + nzB; 1838 1839 cmap = mat->garray; 1840 if (v || idx) { 1841 if (nztot) { 1842 /* Sort by increasing column numbers, assuming A and B already sorted */ 1843 PetscInt imark = -1; 1844 if (v) { 1845 *v = v_p = mat->rowvalues; 1846 for (i=0; i<nzB; i++) { 1847 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1848 else break; 1849 } 1850 imark = i; 1851 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1852 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1853 } 1854 if (idx) { 1855 *idx = idx_p = mat->rowindices; 1856 if (imark > -1) { 1857 for (i=0; i<imark; i++) { 1858 idx_p[i] = cmap[cworkB[i]]; 1859 } 1860 } else { 1861 for (i=0; i<nzB; i++) { 1862 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1863 else break; 1864 } 1865 imark = i; 1866 } 1867 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1868 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1869 } 1870 } else { 1871 if (idx) *idx = 0; 1872 if (v) *v = 0; 1873 } 1874 } 1875 *nz = nztot; 1876 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1877 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1883 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1884 { 1885 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1886 1887 PetscFunctionBegin; 1888 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1889 aij->getrowactive = PETSC_FALSE; 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatNorm_MPIAIJ" 1895 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1896 { 1897 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1898 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1899 PetscErrorCode ierr; 1900 PetscInt i,j,cstart = mat->cmap->rstart; 1901 PetscReal sum = 0.0; 1902 MatScalar *v; 1903 1904 PetscFunctionBegin; 1905 if (aij->size == 1) { 1906 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1907 } else { 1908 if (type == NORM_FROBENIUS) { 1909 v = amat->a; 1910 for (i=0; i<amat->nz; i++) { 1911 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1912 } 1913 v = bmat->a; 1914 for (i=0; i<bmat->nz; i++) { 1915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1916 } 1917 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1918 *norm = PetscSqrtReal(*norm); 1919 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1920 } else if (type == NORM_1) { /* max column norm */ 1921 PetscReal *tmp,*tmp2; 1922 PetscInt *jj,*garray = aij->garray; 1923 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1924 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1925 *norm = 0.0; 1926 v = amat->a; jj = amat->j; 1927 for (j=0; j<amat->nz; j++) { 1928 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1929 } 1930 v = bmat->a; jj = bmat->j; 1931 for (j=0; j<bmat->nz; j++) { 1932 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1933 } 1934 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 for (j=0; j<mat->cmap->N; j++) { 1936 if (tmp2[j] > *norm) *norm = tmp2[j]; 1937 } 1938 ierr = PetscFree(tmp);CHKERRQ(ierr); 1939 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1940 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1941 } else if (type == NORM_INFINITY) { /* max row norm */ 1942 PetscReal ntemp = 0.0; 1943 for (j=0; j<aij->A->rmap->n; j++) { 1944 v = amat->a + amat->i[j]; 1945 sum = 0.0; 1946 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1947 sum += PetscAbsScalar(*v); v++; 1948 } 1949 v = bmat->a + bmat->i[j]; 1950 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1951 sum += PetscAbsScalar(*v); v++; 1952 } 1953 if (sum > ntemp) ntemp = sum; 1954 } 1955 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1956 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1957 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatTranspose_MPIAIJ" 1964 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1965 { 1966 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1967 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1968 PetscErrorCode ierr; 1969 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1970 PetscInt cstart = A->cmap->rstart,ncol; 1971 Mat B; 1972 MatScalar *array; 1973 1974 PetscFunctionBegin; 1975 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1976 1977 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1978 ai = Aloc->i; aj = Aloc->j; 1979 bi = Bloc->i; bj = Bloc->j; 1980 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1981 PetscInt *d_nnz,*g_nnz,*o_nnz; 1982 PetscSFNode *oloc; 1983 PETSC_UNUSED PetscSF sf; 1984 1985 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1986 /* compute d_nnz for preallocation */ 1987 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1988 for (i=0; i<ai[ma]; i++) { 1989 d_nnz[aj[i]]++; 1990 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1991 } 1992 /* compute local off-diagonal contributions */ 1993 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1994 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1995 /* map those to global */ 1996 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1997 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1998 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1999 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2000 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2001 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2002 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2003 2004 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2005 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2006 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2007 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2008 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2009 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2010 } else { 2011 B = *matout; 2012 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2013 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2014 } 2015 2016 /* copy over the A part */ 2017 array = Aloc->a; 2018 row = A->rmap->rstart; 2019 for (i=0; i<ma; i++) { 2020 ncol = ai[i+1]-ai[i]; 2021 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2022 row++; 2023 array += ncol; aj += ncol; 2024 } 2025 aj = Aloc->j; 2026 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2027 2028 /* copy over the B part */ 2029 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2030 array = Bloc->a; 2031 row = A->rmap->rstart; 2032 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2033 cols_tmp = cols; 2034 for (i=0; i<mb; i++) { 2035 ncol = bi[i+1]-bi[i]; 2036 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2037 row++; 2038 array += ncol; cols_tmp += ncol; 2039 } 2040 ierr = PetscFree(cols);CHKERRQ(ierr); 2041 2042 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2043 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2045 *matout = B; 2046 } else { 2047 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2048 } 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2054 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2055 { 2056 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2057 Mat a = aij->A,b = aij->B; 2058 PetscErrorCode ierr; 2059 PetscInt s1,s2,s3; 2060 2061 PetscFunctionBegin; 2062 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2063 if (rr) { 2064 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2065 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2066 /* Overlap communication with computation. */ 2067 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2068 } 2069 if (ll) { 2070 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2071 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2072 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2073 } 2074 /* scale the diagonal block */ 2075 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2076 2077 if (rr) { 2078 /* Do a scatter end and then right scale the off-diagonal block */ 2079 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2087 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2088 { 2089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatEqual_MPIAIJ" 2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2100 { 2101 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2102 Mat a,b,c,d; 2103 PetscBool flg; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 a = matA->A; b = matA->B; 2108 c = matB->A; d = matB->B; 2109 2110 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2111 if (flg) { 2112 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2113 } 2114 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 #undef __FUNCT__ 2119 #define __FUNCT__ "MatCopy_MPIAIJ" 2120 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2121 { 2122 PetscErrorCode ierr; 2123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2124 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2125 2126 PetscFunctionBegin; 2127 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2128 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2129 /* because of the column compression in the off-processor part of the matrix a->B, 2130 the number of columns in a->B and b->B may be different, hence we cannot call 2131 the MatCopy() directly on the two parts. If need be, we can provide a more 2132 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2133 then copying the submatrices */ 2134 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2135 } else { 2136 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2137 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2138 } 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatSetUp_MPIAIJ" 2144 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2145 { 2146 PetscErrorCode ierr; 2147 2148 PetscFunctionBegin; 2149 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /* 2154 Computes the number of nonzeros per row needed for preallocation when X and Y 2155 have different nonzero structure. 2156 */ 2157 #undef __FUNCT__ 2158 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2159 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2160 { 2161 PetscInt i,j,k,nzx,nzy; 2162 2163 PetscFunctionBegin; 2164 /* Set the number of nonzeros in the new matrix */ 2165 for (i=0; i<m; i++) { 2166 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2167 nzx = xi[i+1] - xi[i]; 2168 nzy = yi[i+1] - yi[i]; 2169 nnz[i] = 0; 2170 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2171 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2172 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2173 nnz[i]++; 2174 } 2175 for (; k<nzy; k++) nnz[i]++; 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2183 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2184 { 2185 PetscErrorCode ierr; 2186 PetscInt m = Y->rmap->N; 2187 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2188 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2189 2190 PetscFunctionBegin; 2191 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatAXPY_MPIAIJ" 2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2198 { 2199 PetscErrorCode ierr; 2200 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2201 PetscBLASInt bnz,one=1; 2202 Mat_SeqAIJ *x,*y; 2203 2204 PetscFunctionBegin; 2205 if (str == SAME_NONZERO_PATTERN) { 2206 PetscScalar alpha = a; 2207 x = (Mat_SeqAIJ*)xx->A->data; 2208 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2209 y = (Mat_SeqAIJ*)yy->A->data; 2210 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2211 x = (Mat_SeqAIJ*)xx->B->data; 2212 y = (Mat_SeqAIJ*)yy->B->data; 2213 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2214 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2215 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2216 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2217 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2218 } else { 2219 Mat B; 2220 PetscInt *nnz_d,*nnz_o; 2221 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2222 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2223 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2224 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2225 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2226 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2227 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2228 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2229 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2230 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2231 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2232 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2233 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2234 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatConjugate_MPIAIJ" 2243 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2244 { 2245 #if defined(PETSC_USE_COMPLEX) 2246 PetscErrorCode ierr; 2247 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2248 2249 PetscFunctionBegin; 2250 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2251 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2252 #else 2253 PetscFunctionBegin; 2254 #endif 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatRealPart_MPIAIJ" 2260 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2261 { 2262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2263 PetscErrorCode ierr; 2264 2265 PetscFunctionBegin; 2266 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2267 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2273 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2274 { 2275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2276 PetscErrorCode ierr; 2277 2278 PetscFunctionBegin; 2279 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2280 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2281 PetscFunctionReturn(0); 2282 } 2283 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2286 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2287 { 2288 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2289 PetscErrorCode ierr; 2290 PetscInt i,*idxb = 0; 2291 PetscScalar *va,*vb; 2292 Vec vtmp; 2293 2294 PetscFunctionBegin; 2295 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2296 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2297 if (idx) { 2298 for (i=0; i<A->rmap->n; i++) { 2299 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2300 } 2301 } 2302 2303 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2304 if (idx) { 2305 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2306 } 2307 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2308 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2309 2310 for (i=0; i<A->rmap->n; i++) { 2311 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2312 va[i] = vb[i]; 2313 if (idx) idx[i] = a->garray[idxb[i]]; 2314 } 2315 } 2316 2317 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2318 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2319 ierr = PetscFree(idxb);CHKERRQ(ierr); 2320 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2326 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2327 { 2328 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2329 PetscErrorCode ierr; 2330 PetscInt i,*idxb = 0; 2331 PetscScalar *va,*vb; 2332 Vec vtmp; 2333 2334 PetscFunctionBegin; 2335 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2336 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2337 if (idx) { 2338 for (i=0; i<A->cmap->n; i++) { 2339 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2340 } 2341 } 2342 2343 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2344 if (idx) { 2345 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2346 } 2347 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2348 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2349 2350 for (i=0; i<A->rmap->n; i++) { 2351 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2352 va[i] = vb[i]; 2353 if (idx) idx[i] = a->garray[idxb[i]]; 2354 } 2355 } 2356 2357 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2358 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2359 ierr = PetscFree(idxb);CHKERRQ(ierr); 2360 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2361 PetscFunctionReturn(0); 2362 } 2363 2364 #undef __FUNCT__ 2365 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2366 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2367 { 2368 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2369 PetscInt n = A->rmap->n; 2370 PetscInt cstart = A->cmap->rstart; 2371 PetscInt *cmap = mat->garray; 2372 PetscInt *diagIdx, *offdiagIdx; 2373 Vec diagV, offdiagV; 2374 PetscScalar *a, *diagA, *offdiagA; 2375 PetscInt r; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2380 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2381 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2382 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2383 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2384 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2385 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2386 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2387 for (r = 0; r < n; ++r) { 2388 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2389 a[r] = diagA[r]; 2390 idx[r] = cstart + diagIdx[r]; 2391 } else { 2392 a[r] = offdiagA[r]; 2393 idx[r] = cmap[offdiagIdx[r]]; 2394 } 2395 } 2396 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2397 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2398 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2399 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2400 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2401 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2402 PetscFunctionReturn(0); 2403 } 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2407 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2408 { 2409 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2410 PetscInt n = A->rmap->n; 2411 PetscInt cstart = A->cmap->rstart; 2412 PetscInt *cmap = mat->garray; 2413 PetscInt *diagIdx, *offdiagIdx; 2414 Vec diagV, offdiagV; 2415 PetscScalar *a, *diagA, *offdiagA; 2416 PetscInt r; 2417 PetscErrorCode ierr; 2418 2419 PetscFunctionBegin; 2420 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2421 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2422 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2423 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2424 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2425 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2426 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2427 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2428 for (r = 0; r < n; ++r) { 2429 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2430 a[r] = diagA[r]; 2431 idx[r] = cstart + diagIdx[r]; 2432 } else { 2433 a[r] = offdiagA[r]; 2434 idx[r] = cmap[offdiagIdx[r]]; 2435 } 2436 } 2437 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2438 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2439 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2440 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2441 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2442 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2448 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2449 { 2450 PetscErrorCode ierr; 2451 Mat *dummy; 2452 2453 PetscFunctionBegin; 2454 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2455 *newmat = *dummy; 2456 ierr = PetscFree(dummy);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2462 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2463 { 2464 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2469 A->factorerrortype = a->A->factorerrortype; 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2475 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2476 { 2477 PetscErrorCode ierr; 2478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2479 2480 PetscFunctionBegin; 2481 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2482 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2483 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2484 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2490 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2491 { 2492 PetscFunctionBegin; 2493 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2494 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2495 PetscFunctionReturn(0); 2496 } 2497 2498 #undef __FUNCT__ 2499 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2500 /*@ 2501 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2502 2503 Collective on Mat 2504 2505 Input Parameters: 2506 + A - the matrix 2507 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2508 2509 Level: advanced 2510 2511 @*/ 2512 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2513 { 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 #undef __FUNCT__ 2522 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2523 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2524 { 2525 PetscErrorCode ierr; 2526 PetscBool sc = PETSC_FALSE,flg; 2527 2528 PetscFunctionBegin; 2529 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2530 ierr = PetscObjectOptionsBegin((PetscObject)A); 2531 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2532 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2533 if (flg) { 2534 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2535 } 2536 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatShift_MPIAIJ" 2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2546 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2547 2548 PetscFunctionBegin; 2549 if (!Y->preallocated) { 2550 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2551 } else if (!aij->nz) { 2552 PetscInt nonew = aij->nonew; 2553 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2554 aij->nonew = nonew; 2555 } 2556 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #undef __FUNCT__ 2561 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2569 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2570 if (d) { 2571 PetscInt rstart; 2572 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2573 *d += rstart; 2574 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 2580 /* -------------------------------------------------------------------*/ 2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2582 MatGetRow_MPIAIJ, 2583 MatRestoreRow_MPIAIJ, 2584 MatMult_MPIAIJ, 2585 /* 4*/ MatMultAdd_MPIAIJ, 2586 MatMultTranspose_MPIAIJ, 2587 MatMultTransposeAdd_MPIAIJ, 2588 0, 2589 0, 2590 0, 2591 /*10*/ 0, 2592 0, 2593 0, 2594 MatSOR_MPIAIJ, 2595 MatTranspose_MPIAIJ, 2596 /*15*/ MatGetInfo_MPIAIJ, 2597 MatEqual_MPIAIJ, 2598 MatGetDiagonal_MPIAIJ, 2599 MatDiagonalScale_MPIAIJ, 2600 MatNorm_MPIAIJ, 2601 /*20*/ MatAssemblyBegin_MPIAIJ, 2602 MatAssemblyEnd_MPIAIJ, 2603 MatSetOption_MPIAIJ, 2604 MatZeroEntries_MPIAIJ, 2605 /*24*/ MatZeroRows_MPIAIJ, 2606 0, 2607 0, 2608 0, 2609 0, 2610 /*29*/ MatSetUp_MPIAIJ, 2611 0, 2612 0, 2613 MatGetDiagonalBlock_MPIAIJ, 2614 0, 2615 /*34*/ MatDuplicate_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*39*/ MatAXPY_MPIAIJ, 2621 MatGetSubMatrices_MPIAIJ, 2622 MatIncreaseOverlap_MPIAIJ, 2623 MatGetValues_MPIAIJ, 2624 MatCopy_MPIAIJ, 2625 /*44*/ MatGetRowMax_MPIAIJ, 2626 MatScale_MPIAIJ, 2627 MatShift_MPIAIJ, 2628 MatDiagonalSet_MPIAIJ, 2629 MatZeroRowsColumns_MPIAIJ, 2630 /*49*/ MatSetRandom_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*54*/ MatFDColoringCreate_MPIXAIJ, 2636 0, 2637 MatSetUnfactored_MPIAIJ, 2638 MatPermute_MPIAIJ, 2639 0, 2640 /*59*/ MatGetSubMatrix_MPIAIJ, 2641 MatDestroy_MPIAIJ, 2642 MatView_MPIAIJ, 2643 0, 2644 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2645 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2646 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2647 0, 2648 0, 2649 0, 2650 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2651 MatGetRowMinAbs_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*75*/ MatFDColoringApply_AIJ, 2657 MatSetFromOptions_MPIAIJ, 2658 0, 2659 0, 2660 MatFindZeroDiagonals_MPIAIJ, 2661 /*80*/ 0, 2662 0, 2663 0, 2664 /*83*/ MatLoad_MPIAIJ, 2665 0, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2671 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2672 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2673 MatPtAP_MPIAIJ_MPIAIJ, 2674 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2675 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*99*/ 0, 2681 0, 2682 0, 2683 MatConjugate_MPIAIJ, 2684 0, 2685 /*104*/MatSetValuesRow_MPIAIJ, 2686 MatRealPart_MPIAIJ, 2687 MatImaginaryPart_MPIAIJ, 2688 0, 2689 0, 2690 /*109*/0, 2691 0, 2692 MatGetRowMin_MPIAIJ, 2693 0, 2694 MatMissingDiagonal_MPIAIJ, 2695 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2696 0, 2697 MatGetGhosts_MPIAIJ, 2698 0, 2699 0, 2700 /*119*/0, 2701 0, 2702 0, 2703 0, 2704 MatGetMultiProcBlock_MPIAIJ, 2705 /*124*/MatFindNonzeroRows_MPIAIJ, 2706 MatGetColumnNorms_MPIAIJ, 2707 MatInvertBlockDiagonal_MPIAIJ, 2708 0, 2709 MatGetSubMatricesMPI_MPIAIJ, 2710 /*129*/0, 2711 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2712 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2713 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2714 0, 2715 /*134*/0, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*139*/MatSetBlockSizes_MPIAIJ, 2721 0, 2722 0, 2723 MatFDColoringSetUp_MPIXAIJ, 2724 MatFindOffBlockDiagonalEntries_MPIAIJ, 2725 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2726 }; 2727 2728 /* ----------------------------------------------------------------------------------------*/ 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2732 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2733 { 2734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2735 PetscErrorCode ierr; 2736 2737 PetscFunctionBegin; 2738 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2739 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2740 PetscFunctionReturn(0); 2741 } 2742 2743 #undef __FUNCT__ 2744 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2745 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 #undef __FUNCT__ 2757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2758 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2759 { 2760 Mat_MPIAIJ *b; 2761 PetscErrorCode ierr; 2762 2763 PetscFunctionBegin; 2764 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2765 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2766 b = (Mat_MPIAIJ*)B->data; 2767 2768 #if defined(PETSC_USE_CTABLE) 2769 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2770 #else 2771 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2772 #endif 2773 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2774 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2775 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2776 2777 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2778 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 2785 if (!B->preallocated) { 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2791 } 2792 2793 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2794 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2795 B->preallocated = PETSC_TRUE; 2796 B->was_assembled = PETSC_FALSE; 2797 B->assembled = PETSC_FALSE;; 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2803 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2804 { 2805 Mat mat; 2806 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2807 PetscErrorCode ierr; 2808 2809 PetscFunctionBegin; 2810 *newmat = 0; 2811 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2812 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2813 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2814 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2815 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2816 a = (Mat_MPIAIJ*)mat->data; 2817 2818 mat->factortype = matin->factortype; 2819 mat->assembled = PETSC_TRUE; 2820 mat->insertmode = NOT_SET_VALUES; 2821 mat->preallocated = PETSC_TRUE; 2822 2823 a->size = oldmat->size; 2824 a->rank = oldmat->rank; 2825 a->donotstash = oldmat->donotstash; 2826 a->roworiented = oldmat->roworiented; 2827 a->rowindices = 0; 2828 a->rowvalues = 0; 2829 a->getrowactive = PETSC_FALSE; 2830 2831 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2832 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2833 2834 if (oldmat->colmap) { 2835 #if defined(PETSC_USE_CTABLE) 2836 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2837 #else 2838 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2839 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2840 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 #endif 2842 } else a->colmap = 0; 2843 if (oldmat->garray) { 2844 PetscInt len; 2845 len = oldmat->B->cmap->n; 2846 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2847 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2848 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2849 } else a->garray = 0; 2850 2851 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2852 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2853 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2855 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2859 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2860 *newmat = mat; 2861 PetscFunctionReturn(0); 2862 } 2863 2864 2865 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatLoad_MPIAIJ" 2868 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2869 { 2870 PetscScalar *vals,*svals; 2871 MPI_Comm comm; 2872 PetscErrorCode ierr; 2873 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2874 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2875 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2876 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2877 PetscInt cend,cstart,n,*rowners; 2878 int fd; 2879 PetscInt bs = newMat->rmap->bs; 2880 2881 PetscFunctionBegin; 2882 /* force binary viewer to load .info file if it has not yet done so */ 2883 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2884 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2886 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2887 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2888 if (!rank) { 2889 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2890 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2891 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2892 } 2893 2894 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2895 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2896 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2897 if (bs < 0) bs = 1; 2898 2899 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2900 M = header[1]; N = header[2]; 2901 2902 /* If global sizes are set, check if they are consistent with that given in the file */ 2903 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2904 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2905 2906 /* determine ownership of all (block) rows */ 2907 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2908 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2909 else m = newMat->rmap->n; /* Set by user */ 2910 2911 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2912 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2913 2914 /* First process needs enough room for process with most rows */ 2915 if (!rank) { 2916 mmax = rowners[1]; 2917 for (i=2; i<=size; i++) { 2918 mmax = PetscMax(mmax, rowners[i]); 2919 } 2920 } else mmax = -1; /* unused, but compilers complain */ 2921 2922 rowners[0] = 0; 2923 for (i=2; i<=size; i++) { 2924 rowners[i] += rowners[i-1]; 2925 } 2926 rstart = rowners[rank]; 2927 rend = rowners[rank+1]; 2928 2929 /* distribute row lengths to all processors */ 2930 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2931 if (!rank) { 2932 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2933 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2934 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2935 for (j=0; j<m; j++) { 2936 procsnz[0] += ourlens[j]; 2937 } 2938 for (i=1; i<size; i++) { 2939 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2940 /* calculate the number of nonzeros on each processor */ 2941 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2942 procsnz[i] += rowlengths[j]; 2943 } 2944 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2945 } 2946 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2947 } else { 2948 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2949 } 2950 2951 if (!rank) { 2952 /* determine max buffer needed and allocate it */ 2953 maxnz = 0; 2954 for (i=0; i<size; i++) { 2955 maxnz = PetscMax(maxnz,procsnz[i]); 2956 } 2957 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2958 2959 /* read in my part of the matrix column indices */ 2960 nz = procsnz[0]; 2961 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2962 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2963 2964 /* read in every one elses and ship off */ 2965 for (i=1; i<size; i++) { 2966 nz = procsnz[i]; 2967 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2968 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2969 } 2970 ierr = PetscFree(cols);CHKERRQ(ierr); 2971 } else { 2972 /* determine buffer space needed for message */ 2973 nz = 0; 2974 for (i=0; i<m; i++) { 2975 nz += ourlens[i]; 2976 } 2977 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2978 2979 /* receive message of column indices*/ 2980 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2981 } 2982 2983 /* determine column ownership if matrix is not square */ 2984 if (N != M) { 2985 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2986 else n = newMat->cmap->n; 2987 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2988 cstart = cend - n; 2989 } else { 2990 cstart = rstart; 2991 cend = rend; 2992 n = cend - cstart; 2993 } 2994 2995 /* loop over local rows, determining number of off diagonal entries */ 2996 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2997 jj = 0; 2998 for (i=0; i<m; i++) { 2999 for (j=0; j<ourlens[i]; j++) { 3000 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3001 jj++; 3002 } 3003 } 3004 3005 for (i=0; i<m; i++) { 3006 ourlens[i] -= offlens[i]; 3007 } 3008 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3009 3010 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3011 3012 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3013 3014 for (i=0; i<m; i++) { 3015 ourlens[i] += offlens[i]; 3016 } 3017 3018 if (!rank) { 3019 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3020 3021 /* read in my part of the matrix numerical values */ 3022 nz = procsnz[0]; 3023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3024 3025 /* insert into matrix */ 3026 jj = rstart; 3027 smycols = mycols; 3028 svals = vals; 3029 for (i=0; i<m; i++) { 3030 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3031 smycols += ourlens[i]; 3032 svals += ourlens[i]; 3033 jj++; 3034 } 3035 3036 /* read in other processors and ship out */ 3037 for (i=1; i<size; i++) { 3038 nz = procsnz[i]; 3039 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3040 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3041 } 3042 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3043 } else { 3044 /* receive numeric values */ 3045 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3046 3047 /* receive message of values*/ 3048 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3049 3050 /* insert into matrix */ 3051 jj = rstart; 3052 smycols = mycols; 3053 svals = vals; 3054 for (i=0; i<m; i++) { 3055 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3056 smycols += ourlens[i]; 3057 svals += ourlens[i]; 3058 jj++; 3059 } 3060 } 3061 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3062 ierr = PetscFree(vals);CHKERRQ(ierr); 3063 ierr = PetscFree(mycols);CHKERRQ(ierr); 3064 ierr = PetscFree(rowners);CHKERRQ(ierr); 3065 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3066 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3072 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3074 { 3075 PetscErrorCode ierr; 3076 IS iscol_local; 3077 PetscInt csize; 3078 3079 PetscFunctionBegin; 3080 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3081 if (call == MAT_REUSE_MATRIX) { 3082 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3083 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3084 } else { 3085 /* check if we are grabbing all columns*/ 3086 PetscBool isstride; 3087 PetscMPIInt lisstride = 0,gisstride; 3088 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3089 if (isstride) { 3090 PetscInt start,len,mstart,mlen; 3091 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3092 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3093 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3094 if (mstart == start && mlen-mstart == len) lisstride = 1; 3095 } 3096 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3097 if (gisstride) { 3098 PetscInt N; 3099 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3100 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3101 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3102 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3103 } else { 3104 PetscInt cbs; 3105 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3106 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3107 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3108 } 3109 } 3110 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3111 if (call == MAT_INITIAL_MATRIX) { 3112 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3113 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3114 } 3115 PetscFunctionReturn(0); 3116 } 3117 3118 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3121 /* 3122 Not great since it makes two copies of the submatrix, first an SeqAIJ 3123 in local and then by concatenating the local matrices the end result. 3124 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3125 3126 Note: This requires a sequential iscol with all indices. 3127 */ 3128 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3129 { 3130 PetscErrorCode ierr; 3131 PetscMPIInt rank,size; 3132 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3133 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3134 PetscBool allcolumns, colflag; 3135 Mat M,Mreuse; 3136 MatScalar *vwork,*aa; 3137 MPI_Comm comm; 3138 Mat_SeqAIJ *aij; 3139 3140 PetscFunctionBegin; 3141 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3142 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3143 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3144 3145 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3146 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3147 if (colflag && ncol == mat->cmap->N) { 3148 allcolumns = PETSC_TRUE; 3149 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3150 } else { 3151 allcolumns = PETSC_FALSE; 3152 } 3153 if (call == MAT_REUSE_MATRIX) { 3154 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3155 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3156 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3157 } else { 3158 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3159 } 3160 3161 /* 3162 m - number of local rows 3163 n - number of columns (same on all processors) 3164 rstart - first row in new global matrix generated 3165 */ 3166 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3167 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3168 if (call == MAT_INITIAL_MATRIX) { 3169 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3170 ii = aij->i; 3171 jj = aij->j; 3172 3173 /* 3174 Determine the number of non-zeros in the diagonal and off-diagonal 3175 portions of the matrix in order to do correct preallocation 3176 */ 3177 3178 /* first get start and end of "diagonal" columns */ 3179 if (csize == PETSC_DECIDE) { 3180 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3181 if (mglobal == n) { /* square matrix */ 3182 nlocal = m; 3183 } else { 3184 nlocal = n/size + ((n % size) > rank); 3185 } 3186 } else { 3187 nlocal = csize; 3188 } 3189 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3190 rstart = rend - nlocal; 3191 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3192 3193 /* next, compute all the lengths */ 3194 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3195 olens = dlens + m; 3196 for (i=0; i<m; i++) { 3197 jend = ii[i+1] - ii[i]; 3198 olen = 0; 3199 dlen = 0; 3200 for (j=0; j<jend; j++) { 3201 if (*jj < rstart || *jj >= rend) olen++; 3202 else dlen++; 3203 jj++; 3204 } 3205 olens[i] = olen; 3206 dlens[i] = dlen; 3207 } 3208 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3209 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3210 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3211 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3212 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3213 ierr = PetscFree(dlens);CHKERRQ(ierr); 3214 } else { 3215 PetscInt ml,nl; 3216 3217 M = *newmat; 3218 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3219 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3220 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3221 /* 3222 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3223 rather than the slower MatSetValues(). 3224 */ 3225 M->was_assembled = PETSC_TRUE; 3226 M->assembled = PETSC_FALSE; 3227 } 3228 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3229 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3230 ii = aij->i; 3231 jj = aij->j; 3232 aa = aij->a; 3233 for (i=0; i<m; i++) { 3234 row = rstart + i; 3235 nz = ii[i+1] - ii[i]; 3236 cwork = jj; jj += nz; 3237 vwork = aa; aa += nz; 3238 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3239 } 3240 3241 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3242 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 *newmat = M; 3244 3245 /* save submatrix used in processor for next request */ 3246 if (call == MAT_INITIAL_MATRIX) { 3247 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3248 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3249 } 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3255 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3256 { 3257 PetscInt m,cstart, cend,j,nnz,i,d; 3258 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3259 const PetscInt *JJ; 3260 PetscScalar *values; 3261 PetscErrorCode ierr; 3262 3263 PetscFunctionBegin; 3264 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3265 3266 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3267 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3268 m = B->rmap->n; 3269 cstart = B->cmap->rstart; 3270 cend = B->cmap->rend; 3271 rstart = B->rmap->rstart; 3272 3273 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3274 3275 #if defined(PETSC_USE_DEBUGGING) 3276 for (i=0; i<m; i++) { 3277 nnz = Ii[i+1]- Ii[i]; 3278 JJ = J + Ii[i]; 3279 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3280 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3281 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3282 } 3283 #endif 3284 3285 for (i=0; i<m; i++) { 3286 nnz = Ii[i+1]- Ii[i]; 3287 JJ = J + Ii[i]; 3288 nnz_max = PetscMax(nnz_max,nnz); 3289 d = 0; 3290 for (j=0; j<nnz; j++) { 3291 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3292 } 3293 d_nnz[i] = d; 3294 o_nnz[i] = nnz - d; 3295 } 3296 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3297 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3298 3299 if (v) values = (PetscScalar*)v; 3300 else { 3301 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3302 } 3303 3304 for (i=0; i<m; i++) { 3305 ii = i + rstart; 3306 nnz = Ii[i+1]- Ii[i]; 3307 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3308 } 3309 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3310 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 3312 if (!v) { 3313 ierr = PetscFree(values);CHKERRQ(ierr); 3314 } 3315 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3316 PetscFunctionReturn(0); 3317 } 3318 3319 #undef __FUNCT__ 3320 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3321 /*@ 3322 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3323 (the default parallel PETSc format). 3324 3325 Collective on MPI_Comm 3326 3327 Input Parameters: 3328 + B - the matrix 3329 . i - the indices into j for the start of each local row (starts with zero) 3330 . j - the column indices for each local row (starts with zero) 3331 - v - optional values in the matrix 3332 3333 Level: developer 3334 3335 Notes: 3336 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3337 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3338 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3339 3340 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3341 3342 The format which is used for the sparse matrix input, is equivalent to a 3343 row-major ordering.. i.e for the following matrix, the input data expected is 3344 as shown 3345 3346 $ 1 0 0 3347 $ 2 0 3 P0 3348 $ ------- 3349 $ 4 5 6 P1 3350 $ 3351 $ Process0 [P0]: rows_owned=[0,1] 3352 $ i = {0,1,3} [size = nrow+1 = 2+1] 3353 $ j = {0,0,2} [size = 3] 3354 $ v = {1,2,3} [size = 3] 3355 $ 3356 $ Process1 [P1]: rows_owned=[2] 3357 $ i = {0,3} [size = nrow+1 = 1+1] 3358 $ j = {0,1,2} [size = 3] 3359 $ v = {4,5,6} [size = 3] 3360 3361 .keywords: matrix, aij, compressed row, sparse, parallel 3362 3363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3364 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3365 @*/ 3366 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3367 { 3368 PetscErrorCode ierr; 3369 3370 PetscFunctionBegin; 3371 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3377 /*@C 3378 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3379 (the default parallel PETSc format). For good matrix assembly performance 3380 the user should preallocate the matrix storage by setting the parameters 3381 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3382 performance can be increased by more than a factor of 50. 3383 3384 Collective on MPI_Comm 3385 3386 Input Parameters: 3387 + B - the matrix 3388 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3389 (same value is used for all local rows) 3390 . d_nnz - array containing the number of nonzeros in the various rows of the 3391 DIAGONAL portion of the local submatrix (possibly different for each row) 3392 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3393 The size of this array is equal to the number of local rows, i.e 'm'. 3394 For matrices that will be factored, you must leave room for (and set) 3395 the diagonal entry even if it is zero. 3396 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3397 submatrix (same value is used for all local rows). 3398 - o_nnz - array containing the number of nonzeros in the various rows of the 3399 OFF-DIAGONAL portion of the local submatrix (possibly different for 3400 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3401 structure. The size of this array is equal to the number 3402 of local rows, i.e 'm'. 3403 3404 If the *_nnz parameter is given then the *_nz parameter is ignored 3405 3406 The AIJ format (also called the Yale sparse matrix format or 3407 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3408 storage. The stored row and column indices begin with zero. 3409 See Users-Manual: ch_mat for details. 3410 3411 The parallel matrix is partitioned such that the first m0 rows belong to 3412 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3413 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3414 3415 The DIAGONAL portion of the local submatrix of a processor can be defined 3416 as the submatrix which is obtained by extraction the part corresponding to 3417 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3418 first row that belongs to the processor, r2 is the last row belonging to 3419 the this processor, and c1-c2 is range of indices of the local part of a 3420 vector suitable for applying the matrix to. This is an mxn matrix. In the 3421 common case of a square matrix, the row and column ranges are the same and 3422 the DIAGONAL part is also square. The remaining portion of the local 3423 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3424 3425 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3426 3427 You can call MatGetInfo() to get information on how effective the preallocation was; 3428 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3429 You can also run with the option -info and look for messages with the string 3430 malloc in them to see if additional memory allocation was needed. 3431 3432 Example usage: 3433 3434 Consider the following 8x8 matrix with 34 non-zero values, that is 3435 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3436 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3437 as follows: 3438 3439 .vb 3440 1 2 0 | 0 3 0 | 0 4 3441 Proc0 0 5 6 | 7 0 0 | 8 0 3442 9 0 10 | 11 0 0 | 12 0 3443 ------------------------------------- 3444 13 0 14 | 15 16 17 | 0 0 3445 Proc1 0 18 0 | 19 20 21 | 0 0 3446 0 0 0 | 22 23 0 | 24 0 3447 ------------------------------------- 3448 Proc2 25 26 27 | 0 0 28 | 29 0 3449 30 0 0 | 31 32 33 | 0 34 3450 .ve 3451 3452 This can be represented as a collection of submatrices as: 3453 3454 .vb 3455 A B C 3456 D E F 3457 G H I 3458 .ve 3459 3460 Where the submatrices A,B,C are owned by proc0, D,E,F are 3461 owned by proc1, G,H,I are owned by proc2. 3462 3463 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3464 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3465 The 'M','N' parameters are 8,8, and have the same values on all procs. 3466 3467 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3468 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3469 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3470 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3471 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3472 matrix, ans [DF] as another SeqAIJ matrix. 3473 3474 When d_nz, o_nz parameters are specified, d_nz storage elements are 3475 allocated for every row of the local diagonal submatrix, and o_nz 3476 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3477 One way to choose d_nz and o_nz is to use the max nonzerors per local 3478 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3479 In this case, the values of d_nz,o_nz are: 3480 .vb 3481 proc0 : dnz = 2, o_nz = 2 3482 proc1 : dnz = 3, o_nz = 2 3483 proc2 : dnz = 1, o_nz = 4 3484 .ve 3485 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3486 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3487 for proc3. i.e we are using 12+15+10=37 storage locations to store 3488 34 values. 3489 3490 When d_nnz, o_nnz parameters are specified, the storage is specified 3491 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3492 In the above case the values for d_nnz,o_nnz are: 3493 .vb 3494 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3495 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3496 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3497 .ve 3498 Here the space allocated is sum of all the above values i.e 34, and 3499 hence pre-allocation is perfect. 3500 3501 Level: intermediate 3502 3503 .keywords: matrix, aij, compressed row, sparse, parallel 3504 3505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3506 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3507 @*/ 3508 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3509 { 3510 PetscErrorCode ierr; 3511 3512 PetscFunctionBegin; 3513 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3514 PetscValidType(B,1); 3515 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3516 PetscFunctionReturn(0); 3517 } 3518 3519 #undef __FUNCT__ 3520 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3521 /*@ 3522 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3523 CSR format the local rows. 3524 3525 Collective on MPI_Comm 3526 3527 Input Parameters: 3528 + comm - MPI communicator 3529 . m - number of local rows (Cannot be PETSC_DECIDE) 3530 . n - This value should be the same as the local size used in creating the 3531 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3532 calculated if N is given) For square matrices n is almost always m. 3533 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3534 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3535 . i - row indices 3536 . j - column indices 3537 - a - matrix values 3538 3539 Output Parameter: 3540 . mat - the matrix 3541 3542 Level: intermediate 3543 3544 Notes: 3545 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3546 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3547 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3548 3549 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3550 3551 The format which is used for the sparse matrix input, is equivalent to a 3552 row-major ordering.. i.e for the following matrix, the input data expected is 3553 as shown 3554 3555 $ 1 0 0 3556 $ 2 0 3 P0 3557 $ ------- 3558 $ 4 5 6 P1 3559 $ 3560 $ Process0 [P0]: rows_owned=[0,1] 3561 $ i = {0,1,3} [size = nrow+1 = 2+1] 3562 $ j = {0,0,2} [size = 3] 3563 $ v = {1,2,3} [size = 3] 3564 $ 3565 $ Process1 [P1]: rows_owned=[2] 3566 $ i = {0,3} [size = nrow+1 = 1+1] 3567 $ j = {0,1,2} [size = 3] 3568 $ v = {4,5,6} [size = 3] 3569 3570 .keywords: matrix, aij, compressed row, sparse, parallel 3571 3572 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3573 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3574 @*/ 3575 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3576 { 3577 PetscErrorCode ierr; 3578 3579 PetscFunctionBegin; 3580 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3581 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3582 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3583 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3584 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3585 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3586 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3587 PetscFunctionReturn(0); 3588 } 3589 3590 #undef __FUNCT__ 3591 #define __FUNCT__ "MatCreateAIJ" 3592 /*@C 3593 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3594 (the default parallel PETSc format). For good matrix assembly performance 3595 the user should preallocate the matrix storage by setting the parameters 3596 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3597 performance can be increased by more than a factor of 50. 3598 3599 Collective on MPI_Comm 3600 3601 Input Parameters: 3602 + comm - MPI communicator 3603 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3604 This value should be the same as the local size used in creating the 3605 y vector for the matrix-vector product y = Ax. 3606 . n - This value should be the same as the local size used in creating the 3607 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3608 calculated if N is given) For square matrices n is almost always m. 3609 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3610 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3611 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3612 (same value is used for all local rows) 3613 . d_nnz - array containing the number of nonzeros in the various rows of the 3614 DIAGONAL portion of the local submatrix (possibly different for each row) 3615 or NULL, if d_nz is used to specify the nonzero structure. 3616 The size of this array is equal to the number of local rows, i.e 'm'. 3617 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3618 submatrix (same value is used for all local rows). 3619 - o_nnz - array containing the number of nonzeros in the various rows of the 3620 OFF-DIAGONAL portion of the local submatrix (possibly different for 3621 each row) or NULL, if o_nz is used to specify the nonzero 3622 structure. The size of this array is equal to the number 3623 of local rows, i.e 'm'. 3624 3625 Output Parameter: 3626 . A - the matrix 3627 3628 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3629 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3630 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3631 3632 Notes: 3633 If the *_nnz parameter is given then the *_nz parameter is ignored 3634 3635 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3636 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3637 storage requirements for this matrix. 3638 3639 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3640 processor than it must be used on all processors that share the object for 3641 that argument. 3642 3643 The user MUST specify either the local or global matrix dimensions 3644 (possibly both). 3645 3646 The parallel matrix is partitioned across processors such that the 3647 first m0 rows belong to process 0, the next m1 rows belong to 3648 process 1, the next m2 rows belong to process 2 etc.. where 3649 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3650 values corresponding to [m x N] submatrix. 3651 3652 The columns are logically partitioned with the n0 columns belonging 3653 to 0th partition, the next n1 columns belonging to the next 3654 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3655 3656 The DIAGONAL portion of the local submatrix on any given processor 3657 is the submatrix corresponding to the rows and columns m,n 3658 corresponding to the given processor. i.e diagonal matrix on 3659 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3660 etc. The remaining portion of the local submatrix [m x (N-n)] 3661 constitute the OFF-DIAGONAL portion. The example below better 3662 illustrates this concept. 3663 3664 For a square global matrix we define each processor's diagonal portion 3665 to be its local rows and the corresponding columns (a square submatrix); 3666 each processor's off-diagonal portion encompasses the remainder of the 3667 local matrix (a rectangular submatrix). 3668 3669 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3670 3671 When calling this routine with a single process communicator, a matrix of 3672 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3673 type of communicator, use the construction mechanism: 3674 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3675 3676 By default, this format uses inodes (identical nodes) when possible. 3677 We search for consecutive rows with the same nonzero structure, thereby 3678 reusing matrix information to achieve increased efficiency. 3679 3680 Options Database Keys: 3681 + -mat_no_inode - Do not use inodes 3682 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3683 - -mat_aij_oneindex - Internally use indexing starting at 1 3684 rather than 0. Note that when calling MatSetValues(), 3685 the user still MUST index entries starting at 0! 3686 3687 3688 Example usage: 3689 3690 Consider the following 8x8 matrix with 34 non-zero values, that is 3691 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3692 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3693 as follows: 3694 3695 .vb 3696 1 2 0 | 0 3 0 | 0 4 3697 Proc0 0 5 6 | 7 0 0 | 8 0 3698 9 0 10 | 11 0 0 | 12 0 3699 ------------------------------------- 3700 13 0 14 | 15 16 17 | 0 0 3701 Proc1 0 18 0 | 19 20 21 | 0 0 3702 0 0 0 | 22 23 0 | 24 0 3703 ------------------------------------- 3704 Proc2 25 26 27 | 0 0 28 | 29 0 3705 30 0 0 | 31 32 33 | 0 34 3706 .ve 3707 3708 This can be represented as a collection of submatrices as: 3709 3710 .vb 3711 A B C 3712 D E F 3713 G H I 3714 .ve 3715 3716 Where the submatrices A,B,C are owned by proc0, D,E,F are 3717 owned by proc1, G,H,I are owned by proc2. 3718 3719 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3720 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3721 The 'M','N' parameters are 8,8, and have the same values on all procs. 3722 3723 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3724 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3725 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3726 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3727 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3728 matrix, ans [DF] as another SeqAIJ matrix. 3729 3730 When d_nz, o_nz parameters are specified, d_nz storage elements are 3731 allocated for every row of the local diagonal submatrix, and o_nz 3732 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3733 One way to choose d_nz and o_nz is to use the max nonzerors per local 3734 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3735 In this case, the values of d_nz,o_nz are: 3736 .vb 3737 proc0 : dnz = 2, o_nz = 2 3738 proc1 : dnz = 3, o_nz = 2 3739 proc2 : dnz = 1, o_nz = 4 3740 .ve 3741 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3742 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3743 for proc3. i.e we are using 12+15+10=37 storage locations to store 3744 34 values. 3745 3746 When d_nnz, o_nnz parameters are specified, the storage is specified 3747 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3748 In the above case the values for d_nnz,o_nnz are: 3749 .vb 3750 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3751 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3752 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3753 .ve 3754 Here the space allocated is sum of all the above values i.e 34, and 3755 hence pre-allocation is perfect. 3756 3757 Level: intermediate 3758 3759 .keywords: matrix, aij, compressed row, sparse, parallel 3760 3761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3762 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3763 @*/ 3764 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3765 { 3766 PetscErrorCode ierr; 3767 PetscMPIInt size; 3768 3769 PetscFunctionBegin; 3770 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3771 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3773 if (size > 1) { 3774 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3775 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3776 } else { 3777 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3778 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3779 } 3780 PetscFunctionReturn(0); 3781 } 3782 3783 #undef __FUNCT__ 3784 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3785 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3786 { 3787 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3788 PetscBool flg; 3789 PetscErrorCode ierr; 3790 3791 PetscFunctionBegin; 3792 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3793 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3794 if (Ad) *Ad = a->A; 3795 if (Ao) *Ao = a->B; 3796 if (colmap) *colmap = a->garray; 3797 PetscFunctionReturn(0); 3798 } 3799 3800 #undef __FUNCT__ 3801 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3803 { 3804 PetscErrorCode ierr; 3805 PetscInt m,N,i,rstart,nnz,Ii; 3806 PetscInt *indx; 3807 PetscScalar *values; 3808 3809 PetscFunctionBegin; 3810 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3811 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3812 PetscInt *dnz,*onz,sum,bs,cbs; 3813 3814 if (n == PETSC_DECIDE) { 3815 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3816 } 3817 /* Check sum(n) = N */ 3818 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3819 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3820 3821 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3822 rstart -= m; 3823 3824 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3825 for (i=0; i<m; i++) { 3826 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3827 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3828 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3829 } 3830 3831 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3832 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3833 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3834 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3835 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3836 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3837 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3838 } 3839 3840 /* numeric phase */ 3841 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3842 for (i=0; i<m; i++) { 3843 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3844 Ii = i + rstart; 3845 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3846 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3847 } 3848 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3849 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 PetscFunctionReturn(0); 3851 } 3852 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatFileSplit" 3855 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3856 { 3857 PetscErrorCode ierr; 3858 PetscMPIInt rank; 3859 PetscInt m,N,i,rstart,nnz; 3860 size_t len; 3861 const PetscInt *indx; 3862 PetscViewer out; 3863 char *name; 3864 Mat B; 3865 const PetscScalar *values; 3866 3867 PetscFunctionBegin; 3868 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3869 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3870 /* Should this be the type of the diagonal block of A? */ 3871 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3872 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3873 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3874 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3875 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3876 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3877 for (i=0; i<m; i++) { 3878 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3879 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3880 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3881 } 3882 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 3885 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3886 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3887 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3888 sprintf(name,"%s.%d",outfile,rank); 3889 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3890 ierr = PetscFree(name);CHKERRQ(ierr); 3891 ierr = MatView(B,out);CHKERRQ(ierr); 3892 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3893 ierr = MatDestroy(&B);CHKERRQ(ierr); 3894 PetscFunctionReturn(0); 3895 } 3896 3897 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3898 #undef __FUNCT__ 3899 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3900 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3901 { 3902 PetscErrorCode ierr; 3903 Mat_Merge_SeqsToMPI *merge; 3904 PetscContainer container; 3905 3906 PetscFunctionBegin; 3907 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3908 if (container) { 3909 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3910 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3922 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3923 ierr = PetscFree(merge);CHKERRQ(ierr); 3924 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3925 } 3926 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3927 PetscFunctionReturn(0); 3928 } 3929 3930 #include <../src/mat/utils/freespace.h> 3931 #include <petscbt.h> 3932 3933 #undef __FUNCT__ 3934 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3935 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3936 { 3937 PetscErrorCode ierr; 3938 MPI_Comm comm; 3939 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3940 PetscMPIInt size,rank,taga,*len_s; 3941 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3942 PetscInt proc,m; 3943 PetscInt **buf_ri,**buf_rj; 3944 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3945 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3946 MPI_Request *s_waits,*r_waits; 3947 MPI_Status *status; 3948 MatScalar *aa=a->a; 3949 MatScalar **abuf_r,*ba_i; 3950 Mat_Merge_SeqsToMPI *merge; 3951 PetscContainer container; 3952 3953 PetscFunctionBegin; 3954 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3955 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3956 3957 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3958 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3959 3960 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3961 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3962 3963 bi = merge->bi; 3964 bj = merge->bj; 3965 buf_ri = merge->buf_ri; 3966 buf_rj = merge->buf_rj; 3967 3968 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3969 owners = merge->rowmap->range; 3970 len_s = merge->len_s; 3971 3972 /* send and recv matrix values */ 3973 /*-----------------------------*/ 3974 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3975 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3976 3977 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3978 for (proc=0,k=0; proc<size; proc++) { 3979 if (!len_s[proc]) continue; 3980 i = owners[proc]; 3981 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3982 k++; 3983 } 3984 3985 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3986 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3987 ierr = PetscFree(status);CHKERRQ(ierr); 3988 3989 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3990 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3991 3992 /* insert mat values of mpimat */ 3993 /*----------------------------*/ 3994 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3995 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3996 3997 for (k=0; k<merge->nrecv; k++) { 3998 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3999 nrows = *(buf_ri_k[k]); 4000 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4001 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4002 } 4003 4004 /* set values of ba */ 4005 m = merge->rowmap->n; 4006 for (i=0; i<m; i++) { 4007 arow = owners[rank] + i; 4008 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4009 bnzi = bi[i+1] - bi[i]; 4010 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4011 4012 /* add local non-zero vals of this proc's seqmat into ba */ 4013 anzi = ai[arow+1] - ai[arow]; 4014 aj = a->j + ai[arow]; 4015 aa = a->a + ai[arow]; 4016 nextaj = 0; 4017 for (j=0; nextaj<anzi; j++) { 4018 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4019 ba_i[j] += aa[nextaj++]; 4020 } 4021 } 4022 4023 /* add received vals into ba */ 4024 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4025 /* i-th row */ 4026 if (i == *nextrow[k]) { 4027 anzi = *(nextai[k]+1) - *nextai[k]; 4028 aj = buf_rj[k] + *(nextai[k]); 4029 aa = abuf_r[k] + *(nextai[k]); 4030 nextaj = 0; 4031 for (j=0; nextaj<anzi; j++) { 4032 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4033 ba_i[j] += aa[nextaj++]; 4034 } 4035 } 4036 nextrow[k]++; nextai[k]++; 4037 } 4038 } 4039 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4040 } 4041 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4042 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 4044 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4045 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4046 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4047 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4048 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4053 4054 #undef __FUNCT__ 4055 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4056 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4057 { 4058 PetscErrorCode ierr; 4059 Mat B_mpi; 4060 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4061 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4062 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4063 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4064 PetscInt len,proc,*dnz,*onz,bs,cbs; 4065 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4066 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4067 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4068 MPI_Status *status; 4069 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4070 PetscBT lnkbt; 4071 Mat_Merge_SeqsToMPI *merge; 4072 PetscContainer container; 4073 4074 PetscFunctionBegin; 4075 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4076 4077 /* make sure it is a PETSc comm */ 4078 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4079 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4080 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4081 4082 ierr = PetscNew(&merge);CHKERRQ(ierr); 4083 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4084 4085 /* determine row ownership */ 4086 /*---------------------------------------------------------*/ 4087 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4088 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4089 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4090 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4091 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4092 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4093 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4094 4095 m = merge->rowmap->n; 4096 owners = merge->rowmap->range; 4097 4098 /* determine the number of messages to send, their lengths */ 4099 /*---------------------------------------------------------*/ 4100 len_s = merge->len_s; 4101 4102 len = 0; /* length of buf_si[] */ 4103 merge->nsend = 0; 4104 for (proc=0; proc<size; proc++) { 4105 len_si[proc] = 0; 4106 if (proc == rank) { 4107 len_s[proc] = 0; 4108 } else { 4109 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4110 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4111 } 4112 if (len_s[proc]) { 4113 merge->nsend++; 4114 nrows = 0; 4115 for (i=owners[proc]; i<owners[proc+1]; i++) { 4116 if (ai[i+1] > ai[i]) nrows++; 4117 } 4118 len_si[proc] = 2*(nrows+1); 4119 len += len_si[proc]; 4120 } 4121 } 4122 4123 /* determine the number and length of messages to receive for ij-structure */ 4124 /*-------------------------------------------------------------------------*/ 4125 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4126 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4127 4128 /* post the Irecv of j-structure */ 4129 /*-------------------------------*/ 4130 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4131 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4132 4133 /* post the Isend of j-structure */ 4134 /*--------------------------------*/ 4135 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4136 4137 for (proc=0, k=0; proc<size; proc++) { 4138 if (!len_s[proc]) continue; 4139 i = owners[proc]; 4140 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4141 k++; 4142 } 4143 4144 /* receives and sends of j-structure are complete */ 4145 /*------------------------------------------------*/ 4146 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4147 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4148 4149 /* send and recv i-structure */ 4150 /*---------------------------*/ 4151 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4152 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4153 4154 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4155 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4156 for (proc=0,k=0; proc<size; proc++) { 4157 if (!len_s[proc]) continue; 4158 /* form outgoing message for i-structure: 4159 buf_si[0]: nrows to be sent 4160 [1:nrows]: row index (global) 4161 [nrows+1:2*nrows+1]: i-structure index 4162 */ 4163 /*-------------------------------------------*/ 4164 nrows = len_si[proc]/2 - 1; 4165 buf_si_i = buf_si + nrows+1; 4166 buf_si[0] = nrows; 4167 buf_si_i[0] = 0; 4168 nrows = 0; 4169 for (i=owners[proc]; i<owners[proc+1]; i++) { 4170 anzi = ai[i+1] - ai[i]; 4171 if (anzi) { 4172 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4173 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4174 nrows++; 4175 } 4176 } 4177 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4178 k++; 4179 buf_si += len_si[proc]; 4180 } 4181 4182 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4183 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4184 4185 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4186 for (i=0; i<merge->nrecv; i++) { 4187 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4188 } 4189 4190 ierr = PetscFree(len_si);CHKERRQ(ierr); 4191 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4192 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4193 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4194 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4195 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4196 ierr = PetscFree(status);CHKERRQ(ierr); 4197 4198 /* compute a local seq matrix in each processor */ 4199 /*----------------------------------------------*/ 4200 /* allocate bi array and free space for accumulating nonzero column info */ 4201 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4202 bi[0] = 0; 4203 4204 /* create and initialize a linked list */ 4205 nlnk = N+1; 4206 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4207 4208 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4209 len = ai[owners[rank+1]] - ai[owners[rank]]; 4210 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4211 4212 current_space = free_space; 4213 4214 /* determine symbolic info for each local row */ 4215 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4216 4217 for (k=0; k<merge->nrecv; k++) { 4218 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4219 nrows = *buf_ri_k[k]; 4220 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4221 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4222 } 4223 4224 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4225 len = 0; 4226 for (i=0; i<m; i++) { 4227 bnzi = 0; 4228 /* add local non-zero cols of this proc's seqmat into lnk */ 4229 arow = owners[rank] + i; 4230 anzi = ai[arow+1] - ai[arow]; 4231 aj = a->j + ai[arow]; 4232 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4233 bnzi += nlnk; 4234 /* add received col data into lnk */ 4235 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4236 if (i == *nextrow[k]) { /* i-th row */ 4237 anzi = *(nextai[k]+1) - *nextai[k]; 4238 aj = buf_rj[k] + *nextai[k]; 4239 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4240 bnzi += nlnk; 4241 nextrow[k]++; nextai[k]++; 4242 } 4243 } 4244 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4245 4246 /* if free space is not available, make more free space */ 4247 if (current_space->local_remaining<bnzi) { 4248 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4249 nspacedouble++; 4250 } 4251 /* copy data into free space, then initialize lnk */ 4252 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4253 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4254 4255 current_space->array += bnzi; 4256 current_space->local_used += bnzi; 4257 current_space->local_remaining -= bnzi; 4258 4259 bi[i+1] = bi[i] + bnzi; 4260 } 4261 4262 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4263 4264 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4265 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4266 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4267 4268 /* create symbolic parallel matrix B_mpi */ 4269 /*---------------------------------------*/ 4270 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4271 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4272 if (n==PETSC_DECIDE) { 4273 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4274 } else { 4275 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4276 } 4277 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4278 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4279 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4280 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4281 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4282 4283 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4284 B_mpi->assembled = PETSC_FALSE; 4285 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4286 merge->bi = bi; 4287 merge->bj = bj; 4288 merge->buf_ri = buf_ri; 4289 merge->buf_rj = buf_rj; 4290 merge->coi = NULL; 4291 merge->coj = NULL; 4292 merge->owners_co = NULL; 4293 4294 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4295 4296 /* attach the supporting struct to B_mpi for reuse */ 4297 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4298 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4299 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4300 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4301 *mpimat = B_mpi; 4302 4303 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4304 PetscFunctionReturn(0); 4305 } 4306 4307 #undef __FUNCT__ 4308 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4309 /*@C 4310 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4311 matrices from each processor 4312 4313 Collective on MPI_Comm 4314 4315 Input Parameters: 4316 + comm - the communicators the parallel matrix will live on 4317 . seqmat - the input sequential matrices 4318 . m - number of local rows (or PETSC_DECIDE) 4319 . n - number of local columns (or PETSC_DECIDE) 4320 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4321 4322 Output Parameter: 4323 . mpimat - the parallel matrix generated 4324 4325 Level: advanced 4326 4327 Notes: 4328 The dimensions of the sequential matrix in each processor MUST be the same. 4329 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4330 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4331 @*/ 4332 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4333 { 4334 PetscErrorCode ierr; 4335 PetscMPIInt size; 4336 4337 PetscFunctionBegin; 4338 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4339 if (size == 1) { 4340 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4341 if (scall == MAT_INITIAL_MATRIX) { 4342 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4343 } else { 4344 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4345 } 4346 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4347 PetscFunctionReturn(0); 4348 } 4349 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4350 if (scall == MAT_INITIAL_MATRIX) { 4351 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4352 } 4353 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4354 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4355 PetscFunctionReturn(0); 4356 } 4357 4358 #undef __FUNCT__ 4359 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4360 /*@ 4361 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4362 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4363 with MatGetSize() 4364 4365 Not Collective 4366 4367 Input Parameters: 4368 + A - the matrix 4369 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4370 4371 Output Parameter: 4372 . A_loc - the local sequential matrix generated 4373 4374 Level: developer 4375 4376 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4377 4378 @*/ 4379 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4380 { 4381 PetscErrorCode ierr; 4382 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4383 Mat_SeqAIJ *mat,*a,*b; 4384 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4385 MatScalar *aa,*ba,*cam; 4386 PetscScalar *ca; 4387 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4388 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4389 PetscBool match; 4390 MPI_Comm comm; 4391 PetscMPIInt size; 4392 4393 PetscFunctionBegin; 4394 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4395 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4396 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4397 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4398 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4399 4400 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4401 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4402 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4403 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4404 aa = a->a; ba = b->a; 4405 if (scall == MAT_INITIAL_MATRIX) { 4406 if (size == 1) { 4407 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4408 PetscFunctionReturn(0); 4409 } 4410 4411 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4412 ci[0] = 0; 4413 for (i=0; i<am; i++) { 4414 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4415 } 4416 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4418 k = 0; 4419 for (i=0; i<am; i++) { 4420 ncols_o = bi[i+1] - bi[i]; 4421 ncols_d = ai[i+1] - ai[i]; 4422 /* off-diagonal portion of A */ 4423 for (jo=0; jo<ncols_o; jo++) { 4424 col = cmap[*bj]; 4425 if (col >= cstart) break; 4426 cj[k] = col; bj++; 4427 ca[k++] = *ba++; 4428 } 4429 /* diagonal portion of A */ 4430 for (j=0; j<ncols_d; j++) { 4431 cj[k] = cstart + *aj++; 4432 ca[k++] = *aa++; 4433 } 4434 /* off-diagonal portion of A */ 4435 for (j=jo; j<ncols_o; j++) { 4436 cj[k] = cmap[*bj++]; 4437 ca[k++] = *ba++; 4438 } 4439 } 4440 /* put together the new matrix */ 4441 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4442 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4443 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4444 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4445 mat->free_a = PETSC_TRUE; 4446 mat->free_ij = PETSC_TRUE; 4447 mat->nonew = 0; 4448 } else if (scall == MAT_REUSE_MATRIX) { 4449 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4450 ci = mat->i; cj = mat->j; cam = mat->a; 4451 for (i=0; i<am; i++) { 4452 /* off-diagonal portion of A */ 4453 ncols_o = bi[i+1] - bi[i]; 4454 for (jo=0; jo<ncols_o; jo++) { 4455 col = cmap[*bj]; 4456 if (col >= cstart) break; 4457 *cam++ = *ba++; bj++; 4458 } 4459 /* diagonal portion of A */ 4460 ncols_d = ai[i+1] - ai[i]; 4461 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4462 /* off-diagonal portion of A */ 4463 for (j=jo; j<ncols_o; j++) { 4464 *cam++ = *ba++; bj++; 4465 } 4466 } 4467 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4468 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4469 PetscFunctionReturn(0); 4470 } 4471 4472 #undef __FUNCT__ 4473 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4474 /*@C 4475 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4476 4477 Not Collective 4478 4479 Input Parameters: 4480 + A - the matrix 4481 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4482 - row, col - index sets of rows and columns to extract (or NULL) 4483 4484 Output Parameter: 4485 . A_loc - the local sequential matrix generated 4486 4487 Level: developer 4488 4489 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4490 4491 @*/ 4492 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4493 { 4494 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4495 PetscErrorCode ierr; 4496 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4497 IS isrowa,iscola; 4498 Mat *aloc; 4499 PetscBool match; 4500 4501 PetscFunctionBegin; 4502 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4503 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4504 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4505 if (!row) { 4506 start = A->rmap->rstart; end = A->rmap->rend; 4507 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4508 } else { 4509 isrowa = *row; 4510 } 4511 if (!col) { 4512 start = A->cmap->rstart; 4513 cmap = a->garray; 4514 nzA = a->A->cmap->n; 4515 nzB = a->B->cmap->n; 4516 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4517 ncols = 0; 4518 for (i=0; i<nzB; i++) { 4519 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4520 else break; 4521 } 4522 imark = i; 4523 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4524 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4525 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4526 } else { 4527 iscola = *col; 4528 } 4529 if (scall != MAT_INITIAL_MATRIX) { 4530 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4531 aloc[0] = *A_loc; 4532 } 4533 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4534 *A_loc = aloc[0]; 4535 ierr = PetscFree(aloc);CHKERRQ(ierr); 4536 if (!row) { 4537 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4538 } 4539 if (!col) { 4540 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4541 } 4542 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4543 PetscFunctionReturn(0); 4544 } 4545 4546 #undef __FUNCT__ 4547 #define __FUNCT__ "MatGetBrowsOfAcols" 4548 /*@C 4549 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4550 4551 Collective on Mat 4552 4553 Input Parameters: 4554 + A,B - the matrices in mpiaij format 4555 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4556 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4557 4558 Output Parameter: 4559 + rowb, colb - index sets of rows and columns of B to extract 4560 - B_seq - the sequential matrix generated 4561 4562 Level: developer 4563 4564 @*/ 4565 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4566 { 4567 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4568 PetscErrorCode ierr; 4569 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4570 IS isrowb,iscolb; 4571 Mat *bseq=NULL; 4572 4573 PetscFunctionBegin; 4574 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4575 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4576 } 4577 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4578 4579 if (scall == MAT_INITIAL_MATRIX) { 4580 start = A->cmap->rstart; 4581 cmap = a->garray; 4582 nzA = a->A->cmap->n; 4583 nzB = a->B->cmap->n; 4584 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4585 ncols = 0; 4586 for (i=0; i<nzB; i++) { /* row < local row index */ 4587 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4588 else break; 4589 } 4590 imark = i; 4591 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4592 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4593 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4594 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4595 } else { 4596 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4597 isrowb = *rowb; iscolb = *colb; 4598 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4599 bseq[0] = *B_seq; 4600 } 4601 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4602 *B_seq = bseq[0]; 4603 ierr = PetscFree(bseq);CHKERRQ(ierr); 4604 if (!rowb) { 4605 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4606 } else { 4607 *rowb = isrowb; 4608 } 4609 if (!colb) { 4610 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4611 } else { 4612 *colb = iscolb; 4613 } 4614 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4615 PetscFunctionReturn(0); 4616 } 4617 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4620 /* 4621 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4622 of the OFF-DIAGONAL portion of local A 4623 4624 Collective on Mat 4625 4626 Input Parameters: 4627 + A,B - the matrices in mpiaij format 4628 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4629 4630 Output Parameter: 4631 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4632 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4633 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4634 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4635 4636 Level: developer 4637 4638 */ 4639 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4640 { 4641 VecScatter_MPI_General *gen_to,*gen_from; 4642 PetscErrorCode ierr; 4643 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4644 Mat_SeqAIJ *b_oth; 4645 VecScatter ctx =a->Mvctx; 4646 MPI_Comm comm; 4647 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4648 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4649 PetscScalar *rvalues,*svalues; 4650 MatScalar *b_otha,*bufa,*bufA; 4651 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4652 MPI_Request *rwaits = NULL,*swaits = NULL; 4653 MPI_Status *sstatus,rstatus; 4654 PetscMPIInt jj,size; 4655 PetscInt *cols,sbs,rbs; 4656 PetscScalar *vals; 4657 4658 PetscFunctionBegin; 4659 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4661 4662 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4663 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4664 } 4665 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4666 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4667 4668 gen_to = (VecScatter_MPI_General*)ctx->todata; 4669 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4670 rvalues = gen_from->values; /* holds the length of receiving row */ 4671 svalues = gen_to->values; /* holds the length of sending row */ 4672 nrecvs = gen_from->n; 4673 nsends = gen_to->n; 4674 4675 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4676 srow = gen_to->indices; /* local row index to be sent */ 4677 sstarts = gen_to->starts; 4678 sprocs = gen_to->procs; 4679 sstatus = gen_to->sstatus; 4680 sbs = gen_to->bs; 4681 rstarts = gen_from->starts; 4682 rprocs = gen_from->procs; 4683 rbs = gen_from->bs; 4684 4685 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4686 if (scall == MAT_INITIAL_MATRIX) { 4687 /* i-array */ 4688 /*---------*/ 4689 /* post receives */ 4690 for (i=0; i<nrecvs; i++) { 4691 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4692 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4693 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4694 } 4695 4696 /* pack the outgoing message */ 4697 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4698 4699 sstartsj[0] = 0; 4700 rstartsj[0] = 0; 4701 len = 0; /* total length of j or a array to be sent */ 4702 k = 0; 4703 for (i=0; i<nsends; i++) { 4704 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4705 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4706 for (j=0; j<nrows; j++) { 4707 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4708 for (l=0; l<sbs; l++) { 4709 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4710 4711 rowlen[j*sbs+l] = ncols; 4712 4713 len += ncols; 4714 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4715 } 4716 k++; 4717 } 4718 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4719 4720 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4721 } 4722 /* recvs and sends of i-array are completed */ 4723 i = nrecvs; 4724 while (i--) { 4725 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4726 } 4727 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4728 4729 /* allocate buffers for sending j and a arrays */ 4730 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4731 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4732 4733 /* create i-array of B_oth */ 4734 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4735 4736 b_othi[0] = 0; 4737 len = 0; /* total length of j or a array to be received */ 4738 k = 0; 4739 for (i=0; i<nrecvs; i++) { 4740 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4741 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4742 for (j=0; j<nrows; j++) { 4743 b_othi[k+1] = b_othi[k] + rowlen[j]; 4744 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4745 k++; 4746 } 4747 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4748 } 4749 4750 /* allocate space for j and a arrrays of B_oth */ 4751 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4752 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4753 4754 /* j-array */ 4755 /*---------*/ 4756 /* post receives of j-array */ 4757 for (i=0; i<nrecvs; i++) { 4758 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4759 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4760 } 4761 4762 /* pack the outgoing message j-array */ 4763 k = 0; 4764 for (i=0; i<nsends; i++) { 4765 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4766 bufJ = bufj+sstartsj[i]; 4767 for (j=0; j<nrows; j++) { 4768 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4769 for (ll=0; ll<sbs; ll++) { 4770 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4771 for (l=0; l<ncols; l++) { 4772 *bufJ++ = cols[l]; 4773 } 4774 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4775 } 4776 } 4777 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4778 } 4779 4780 /* recvs and sends of j-array are completed */ 4781 i = nrecvs; 4782 while (i--) { 4783 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4784 } 4785 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4786 } else if (scall == MAT_REUSE_MATRIX) { 4787 sstartsj = *startsj_s; 4788 rstartsj = *startsj_r; 4789 bufa = *bufa_ptr; 4790 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4791 b_otha = b_oth->a; 4792 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4793 4794 /* a-array */ 4795 /*---------*/ 4796 /* post receives of a-array */ 4797 for (i=0; i<nrecvs; i++) { 4798 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4799 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4800 } 4801 4802 /* pack the outgoing message a-array */ 4803 k = 0; 4804 for (i=0; i<nsends; i++) { 4805 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4806 bufA = bufa+sstartsj[i]; 4807 for (j=0; j<nrows; j++) { 4808 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4809 for (ll=0; ll<sbs; ll++) { 4810 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4811 for (l=0; l<ncols; l++) { 4812 *bufA++ = vals[l]; 4813 } 4814 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4815 } 4816 } 4817 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4818 } 4819 /* recvs and sends of a-array are completed */ 4820 i = nrecvs; 4821 while (i--) { 4822 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4823 } 4824 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4825 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4826 4827 if (scall == MAT_INITIAL_MATRIX) { 4828 /* put together the new matrix */ 4829 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4830 4831 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4832 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4833 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4834 b_oth->free_a = PETSC_TRUE; 4835 b_oth->free_ij = PETSC_TRUE; 4836 b_oth->nonew = 0; 4837 4838 ierr = PetscFree(bufj);CHKERRQ(ierr); 4839 if (!startsj_s || !bufa_ptr) { 4840 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4841 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4842 } else { 4843 *startsj_s = sstartsj; 4844 *startsj_r = rstartsj; 4845 *bufa_ptr = bufa; 4846 } 4847 } 4848 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4849 PetscFunctionReturn(0); 4850 } 4851 4852 #undef __FUNCT__ 4853 #define __FUNCT__ "MatGetCommunicationStructs" 4854 /*@C 4855 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4856 4857 Not Collective 4858 4859 Input Parameters: 4860 . A - The matrix in mpiaij format 4861 4862 Output Parameter: 4863 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4864 . colmap - A map from global column index to local index into lvec 4865 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4866 4867 Level: developer 4868 4869 @*/ 4870 #if defined(PETSC_USE_CTABLE) 4871 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4872 #else 4873 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4874 #endif 4875 { 4876 Mat_MPIAIJ *a; 4877 4878 PetscFunctionBegin; 4879 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4880 PetscValidPointer(lvec, 2); 4881 PetscValidPointer(colmap, 3); 4882 PetscValidPointer(multScatter, 4); 4883 a = (Mat_MPIAIJ*) A->data; 4884 if (lvec) *lvec = a->lvec; 4885 if (colmap) *colmap = a->colmap; 4886 if (multScatter) *multScatter = a->Mvctx; 4887 PetscFunctionReturn(0); 4888 } 4889 4890 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4892 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4893 #if defined(PETSC_HAVE_ELEMENTAL) 4894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4895 #endif 4896 #if defined(PETSC_HAVE_HYPRE) 4897 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4898 #endif 4899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4900 4901 #undef __FUNCT__ 4902 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4903 /* 4904 Computes (B'*A')' since computing B*A directly is untenable 4905 4906 n p p 4907 ( ) ( ) ( ) 4908 m ( A ) * n ( B ) = m ( C ) 4909 ( ) ( ) ( ) 4910 4911 */ 4912 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4913 { 4914 PetscErrorCode ierr; 4915 Mat At,Bt,Ct; 4916 4917 PetscFunctionBegin; 4918 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4919 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4920 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4921 ierr = MatDestroy(&At);CHKERRQ(ierr); 4922 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4923 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4924 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4930 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4931 { 4932 PetscErrorCode ierr; 4933 PetscInt m=A->rmap->n,n=B->cmap->n; 4934 Mat Cmat; 4935 4936 PetscFunctionBegin; 4937 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4938 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4939 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4940 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4941 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4942 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4943 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4944 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4945 4946 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4947 4948 *C = Cmat; 4949 PetscFunctionReturn(0); 4950 } 4951 4952 /* ----------------------------------------------------------------*/ 4953 #undef __FUNCT__ 4954 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4955 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4956 { 4957 PetscErrorCode ierr; 4958 4959 PetscFunctionBegin; 4960 if (scall == MAT_INITIAL_MATRIX) { 4961 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4962 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4963 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4964 } 4965 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4966 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4967 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4968 PetscFunctionReturn(0); 4969 } 4970 4971 /*MC 4972 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4973 4974 Options Database Keys: 4975 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4976 4977 Level: beginner 4978 4979 .seealso: MatCreateAIJ() 4980 M*/ 4981 4982 #undef __FUNCT__ 4983 #define __FUNCT__ "MatCreate_MPIAIJ" 4984 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4985 { 4986 Mat_MPIAIJ *b; 4987 PetscErrorCode ierr; 4988 PetscMPIInt size; 4989 4990 PetscFunctionBegin; 4991 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4992 4993 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4994 B->data = (void*)b; 4995 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4996 B->assembled = PETSC_FALSE; 4997 B->insertmode = NOT_SET_VALUES; 4998 b->size = size; 4999 5000 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5001 5002 /* build cache for off array entries formed */ 5003 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5004 5005 b->donotstash = PETSC_FALSE; 5006 b->colmap = 0; 5007 b->garray = 0; 5008 b->roworiented = PETSC_TRUE; 5009 5010 /* stuff used for matrix vector multiply */ 5011 b->lvec = NULL; 5012 b->Mvctx = NULL; 5013 5014 /* stuff for MatGetRow() */ 5015 b->rowindices = 0; 5016 b->rowvalues = 0; 5017 b->getrowactive = PETSC_FALSE; 5018 5019 /* flexible pointer used in CUSP/CUSPARSE classes */ 5020 b->spptr = NULL; 5021 5022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5032 #if defined(PETSC_HAVE_ELEMENTAL) 5033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5034 #endif 5035 #if defined(PETSC_HAVE_HYPRE) 5036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5037 #endif 5038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5042 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5043 PetscFunctionReturn(0); 5044 } 5045 5046 #undef __FUNCT__ 5047 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5048 /*@C 5049 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5050 and "off-diagonal" part of the matrix in CSR format. 5051 5052 Collective on MPI_Comm 5053 5054 Input Parameters: 5055 + comm - MPI communicator 5056 . m - number of local rows (Cannot be PETSC_DECIDE) 5057 . n - This value should be the same as the local size used in creating the 5058 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5059 calculated if N is given) For square matrices n is almost always m. 5060 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5061 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5062 . i - row indices for "diagonal" portion of matrix 5063 . j - column indices 5064 . a - matrix values 5065 . oi - row indices for "off-diagonal" portion of matrix 5066 . oj - column indices 5067 - oa - matrix values 5068 5069 Output Parameter: 5070 . mat - the matrix 5071 5072 Level: advanced 5073 5074 Notes: 5075 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5076 must free the arrays once the matrix has been destroyed and not before. 5077 5078 The i and j indices are 0 based 5079 5080 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5081 5082 This sets local rows and cannot be used to set off-processor values. 5083 5084 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5085 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5086 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5087 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5088 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5089 communication if it is known that only local entries will be set. 5090 5091 .keywords: matrix, aij, compressed row, sparse, parallel 5092 5093 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5094 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5095 @*/ 5096 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5097 { 5098 PetscErrorCode ierr; 5099 Mat_MPIAIJ *maij; 5100 5101 PetscFunctionBegin; 5102 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5103 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5104 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5105 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5106 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5107 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5108 maij = (Mat_MPIAIJ*) (*mat)->data; 5109 5110 (*mat)->preallocated = PETSC_TRUE; 5111 5112 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5113 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5114 5115 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5116 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5117 5118 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5119 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5120 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5121 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5122 5123 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5124 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5125 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5126 PetscFunctionReturn(0); 5127 } 5128 5129 /* 5130 Special version for direct calls from Fortran 5131 */ 5132 #include <petsc/private/fortranimpl.h> 5133 5134 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5135 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5136 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5137 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5138 #endif 5139 5140 /* Change these macros so can be used in void function */ 5141 #undef CHKERRQ 5142 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5143 #undef SETERRQ2 5144 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5145 #undef SETERRQ3 5146 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5147 #undef SETERRQ 5148 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5149 5150 #undef __FUNCT__ 5151 #define __FUNCT__ "matsetvaluesmpiaij_" 5152 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5153 { 5154 Mat mat = *mmat; 5155 PetscInt m = *mm, n = *mn; 5156 InsertMode addv = *maddv; 5157 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5158 PetscScalar value; 5159 PetscErrorCode ierr; 5160 5161 MatCheckPreallocated(mat,1); 5162 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5163 5164 #if defined(PETSC_USE_DEBUG) 5165 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5166 #endif 5167 { 5168 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5169 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5170 PetscBool roworiented = aij->roworiented; 5171 5172 /* Some Variables required in the macro */ 5173 Mat A = aij->A; 5174 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5175 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5176 MatScalar *aa = a->a; 5177 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5178 Mat B = aij->B; 5179 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5180 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5181 MatScalar *ba = b->a; 5182 5183 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5184 PetscInt nonew = a->nonew; 5185 MatScalar *ap1,*ap2; 5186 5187 PetscFunctionBegin; 5188 for (i=0; i<m; i++) { 5189 if (im[i] < 0) continue; 5190 #if defined(PETSC_USE_DEBUG) 5191 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5192 #endif 5193 if (im[i] >= rstart && im[i] < rend) { 5194 row = im[i] - rstart; 5195 lastcol1 = -1; 5196 rp1 = aj + ai[row]; 5197 ap1 = aa + ai[row]; 5198 rmax1 = aimax[row]; 5199 nrow1 = ailen[row]; 5200 low1 = 0; 5201 high1 = nrow1; 5202 lastcol2 = -1; 5203 rp2 = bj + bi[row]; 5204 ap2 = ba + bi[row]; 5205 rmax2 = bimax[row]; 5206 nrow2 = bilen[row]; 5207 low2 = 0; 5208 high2 = nrow2; 5209 5210 for (j=0; j<n; j++) { 5211 if (roworiented) value = v[i*n+j]; 5212 else value = v[i+j*m]; 5213 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5214 if (in[j] >= cstart && in[j] < cend) { 5215 col = in[j] - cstart; 5216 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5217 } else if (in[j] < 0) continue; 5218 #if defined(PETSC_USE_DEBUG) 5219 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5220 #endif 5221 else { 5222 if (mat->was_assembled) { 5223 if (!aij->colmap) { 5224 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5225 } 5226 #if defined(PETSC_USE_CTABLE) 5227 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5228 col--; 5229 #else 5230 col = aij->colmap[in[j]] - 1; 5231 #endif 5232 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5233 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5234 col = in[j]; 5235 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5236 B = aij->B; 5237 b = (Mat_SeqAIJ*)B->data; 5238 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5239 rp2 = bj + bi[row]; 5240 ap2 = ba + bi[row]; 5241 rmax2 = bimax[row]; 5242 nrow2 = bilen[row]; 5243 low2 = 0; 5244 high2 = nrow2; 5245 bm = aij->B->rmap->n; 5246 ba = b->a; 5247 } 5248 } else col = in[j]; 5249 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5250 } 5251 } 5252 } else if (!aij->donotstash) { 5253 if (roworiented) { 5254 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5255 } else { 5256 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5257 } 5258 } 5259 } 5260 } 5261 PetscFunctionReturnVoid(); 5262 } 5263 5264