1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ" 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 63 { 64 PetscErrorCode ierr; 65 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 66 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 67 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 68 const PetscInt *ia,*ib; 69 const MatScalar *aa,*bb; 70 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 71 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 72 73 PetscFunctionBegin; 74 *keptrows = 0; 75 ia = a->i; 76 ib = b->i; 77 for (i=0; i<m; i++) { 78 na = ia[i+1] - ia[i]; 79 nb = ib[i+1] - ib[i]; 80 if (!na && !nb) { 81 cnt++; 82 goto ok1; 83 } 84 aa = a->a + ia[i]; 85 for (j=0; j<na; j++) { 86 if (aa[j] != 0.0) goto ok1; 87 } 88 bb = b->a + ib[i]; 89 for (j=0; j <nb; j++) { 90 if (bb[j] != 0.0) goto ok1; 91 } 92 cnt++; 93 ok1:; 94 } 95 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 96 if (!n0rows) PetscFunctionReturn(0); 97 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 98 cnt = 0; 99 for (i=0; i<m; i++) { 100 na = ia[i+1] - ia[i]; 101 nb = ib[i+1] - ib[i]; 102 if (!na && !nb) continue; 103 aa = a->a + ia[i]; 104 for (j=0; j<na;j++) { 105 if (aa[j] != 0.0) { 106 rows[cnt++] = rstart + i; 107 goto ok2; 108 } 109 } 110 bb = b->a + ib[i]; 111 for (j=0; j<nb; j++) { 112 if (bb[j] != 0.0) { 113 rows[cnt++] = rstart + i; 114 goto ok2; 115 } 116 } 117 ok2:; 118 } 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 125 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 129 130 PetscFunctionBegin; 131 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 132 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 133 } else { 134 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 135 } 136 PetscFunctionReturn(0); 137 } 138 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 145 PetscErrorCode ierr; 146 PetscInt i,rstart,nrows,*rows; 147 148 PetscFunctionBegin; 149 *zrows = NULL; 150 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 151 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 152 for (i=0; i<nrows; i++) rows[i] += rstart; 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 163 PetscInt i,n,*garray = aij->garray; 164 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 165 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 166 PetscReal *work; 167 168 PetscFunctionBegin; 169 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 170 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 171 if (type == NORM_2) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 177 } 178 } else if (type == NORM_1) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 184 } 185 } else if (type == NORM_INFINITY) { 186 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 187 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 188 } 189 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 190 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 191 } 192 193 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 194 if (type == NORM_INFINITY) { 195 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 196 } else { 197 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 198 } 199 ierr = PetscFree(work);CHKERRQ(ierr); 200 if (type == NORM_2) { 201 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 202 } 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNCT__ 207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 209 { 210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 211 IS sis,gis; 212 PetscErrorCode ierr; 213 const PetscInt *isis,*igis; 214 PetscInt n,*iis,nsis,ngis,rstart,i; 215 216 PetscFunctionBegin; 217 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 218 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 219 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 220 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 221 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 223 224 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 225 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 226 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 227 n = ngis + nsis; 228 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 230 for (i=0; i<n; i++) iis[i] += rstart; 231 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 232 233 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 235 ierr = ISDestroy(&sis);CHKERRQ(ierr); 236 ierr = ISDestroy(&gis);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatDistribute_MPIAIJ" 242 /* 243 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 244 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 245 246 Only for square matrices 247 248 Used by a preconditioner, hence PETSC_EXTERN 249 */ 250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 251 { 252 PetscMPIInt rank,size; 253 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 254 PetscErrorCode ierr; 255 Mat mat; 256 Mat_SeqAIJ *gmata; 257 PetscMPIInt tag; 258 MPI_Status status; 259 PetscBool aij; 260 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 261 262 PetscFunctionBegin; 263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 265 if (!rank) { 266 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 267 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 268 } 269 if (reuse == MAT_INITIAL_MATRIX) { 270 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 271 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 272 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 273 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 274 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 275 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 276 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 277 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 278 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 279 280 rowners[0] = 0; 281 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 282 rstart = rowners[rank]; 283 rend = rowners[rank+1]; 284 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 285 if (!rank) { 286 gmata = (Mat_SeqAIJ*) gmat->data; 287 /* send row lengths to all processors */ 288 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 289 for (i=1; i<size; i++) { 290 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 291 } 292 /* determine number diagonal and off-diagonal counts */ 293 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 294 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 295 jj = 0; 296 for (i=0; i<m; i++) { 297 for (j=0; j<dlens[i]; j++) { 298 if (gmata->j[jj] < rstart) ld[i]++; 299 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 300 jj++; 301 } 302 } 303 /* send column indices to other processes */ 304 for (i=1; i<size; i++) { 305 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 306 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 307 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 308 } 309 310 /* send numerical values to other processes */ 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 gmataa = gmata->a; 316 gmataj = gmata->j; 317 318 } else { 319 /* receive row lengths */ 320 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 321 /* receive column indices */ 322 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 323 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 325 /* determine number diagonal and off-diagonal counts */ 326 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 327 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 328 jj = 0; 329 for (i=0; i<m; i++) { 330 for (j=0; j<dlens[i]; j++) { 331 if (gmataj[jj] < rstart) ld[i]++; 332 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 333 jj++; 334 } 335 } 336 /* receive numerical values */ 337 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 339 } 340 /* set preallocation */ 341 for (i=0; i<m; i++) { 342 dlens[i] -= olens[i]; 343 } 344 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 345 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 346 347 for (i=0; i<m; i++) { 348 dlens[i] += olens[i]; 349 } 350 cnt = 0; 351 for (i=0; i<m; i++) { 352 row = rstart + i; 353 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 354 cnt += dlens[i]; 355 } 356 if (rank) { 357 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 358 } 359 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 360 ierr = PetscFree(rowners);CHKERRQ(ierr); 361 362 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 363 364 *inmat = mat; 365 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 366 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 367 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 368 mat = *inmat; 369 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 370 if (!rank) { 371 /* send numerical values to other processes */ 372 gmata = (Mat_SeqAIJ*) gmat->data; 373 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 374 gmataa = gmata->a; 375 for (i=1; i<size; i++) { 376 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 377 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 378 } 379 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 380 } else { 381 /* receive numerical values from process 0*/ 382 nz = Ad->nz + Ao->nz; 383 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 384 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 385 } 386 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 387 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 388 ad = Ad->a; 389 ao = Ao->a; 390 if (mat->rmap->n) { 391 i = 0; 392 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 393 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 394 } 395 for (i=1; i<mat->rmap->n; i++) { 396 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 397 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 398 } 399 i--; 400 if (mat->rmap->n) { 401 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (rank) { 404 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 405 } 406 } 407 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 408 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* 413 Local utility routine that creates a mapping from the global column 414 number to the local number in the off-diagonal part of the local 415 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 416 a slightly higher hash table cost; without it it is not scalable (each processor 417 has an order N integer array but is fast to acess. 418 */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 422 { 423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 424 PetscErrorCode ierr; 425 PetscInt n = aij->B->cmap->n,i; 426 427 PetscFunctionBegin; 428 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 431 for (i=0; i<n; i++) { 432 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 433 } 434 #else 435 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 436 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 437 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 438 #endif 439 PetscFunctionReturn(0); 440 } 441 442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 443 { \ 444 if (col <= lastcol1) low1 = 0; \ 445 else high1 = nrow1; \ 446 lastcol1 = col;\ 447 while (high1-low1 > 5) { \ 448 t = (low1+high1)/2; \ 449 if (rp1[t] > col) high1 = t; \ 450 else low1 = t; \ 451 } \ 452 for (_i=low1; _i<high1; _i++) { \ 453 if (rp1[_i] > col) break; \ 454 if (rp1[_i] == col) { \ 455 if (addv == ADD_VALUES) ap1[_i] += value; \ 456 else ap1[_i] = value; \ 457 goto a_noinsert; \ 458 } \ 459 } \ 460 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 461 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 462 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 464 N = nrow1++ - 1; a->nz++; high1++; \ 465 /* shift up all the later entries in this row */ \ 466 for (ii=N; ii>=_i; ii--) { \ 467 rp1[ii+1] = rp1[ii]; \ 468 ap1[ii+1] = ap1[ii]; \ 469 } \ 470 rp1[_i] = col; \ 471 ap1[_i] = value; \ 472 A->nonzerostate++;\ 473 a_noinsert: ; \ 474 ailen[row] = nrow1; \ 475 } 476 477 478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 479 { \ 480 if (col <= lastcol2) low2 = 0; \ 481 else high2 = nrow2; \ 482 lastcol2 = col; \ 483 while (high2-low2 > 5) { \ 484 t = (low2+high2)/2; \ 485 if (rp2[t] > col) high2 = t; \ 486 else low2 = t; \ 487 } \ 488 for (_i=low2; _i<high2; _i++) { \ 489 if (rp2[_i] > col) break; \ 490 if (rp2[_i] == col) { \ 491 if (addv == ADD_VALUES) ap2[_i] += value; \ 492 else ap2[_i] = value; \ 493 goto b_noinsert; \ 494 } \ 495 } \ 496 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 497 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 499 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 500 N = nrow2++ - 1; b->nz++; high2++; \ 501 /* shift up all the later entries in this row */ \ 502 for (ii=N; ii>=_i; ii--) { \ 503 rp2[ii+1] = rp2[ii]; \ 504 ap2[ii+1] = ap2[ii]; \ 505 } \ 506 rp2[_i] = col; \ 507 ap2[_i] = value; \ 508 B->nonzerostate++; \ 509 b_noinsert: ; \ 510 bilen[row] = nrow2; \ 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 516 { 517 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 519 PetscErrorCode ierr; 520 PetscInt l,*garray = mat->garray,diag; 521 522 PetscFunctionBegin; 523 /* code only works for square matrices A */ 524 525 /* find size of row to the left of the diagonal part */ 526 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 527 row = row - diag; 528 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 529 if (garray[b->j[b->i[row]+l]] > diag) break; 530 } 531 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 532 533 /* diagonal part */ 534 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 535 536 /* right of diagonal part */ 537 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatSetValues_MPIAIJ" 543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 PetscScalar value; 547 PetscErrorCode ierr; 548 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 549 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 550 PetscBool roworiented = aij->roworiented; 551 552 /* Some Variables required in the macro */ 553 Mat A = aij->A; 554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 555 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 556 MatScalar *aa = a->a; 557 PetscBool ignorezeroentries = a->ignorezeroentries; 558 Mat B = aij->B; 559 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 560 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 561 MatScalar *ba = b->a; 562 563 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 564 PetscInt nonew; 565 MatScalar *ap1,*ap2; 566 567 PetscFunctionBegin; 568 for (i=0; i<m; i++) { 569 if (im[i] < 0) continue; 570 #if defined(PETSC_USE_DEBUG) 571 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 572 #endif 573 if (im[i] >= rstart && im[i] < rend) { 574 row = im[i] - rstart; 575 lastcol1 = -1; 576 rp1 = aj + ai[row]; 577 ap1 = aa + ai[row]; 578 rmax1 = aimax[row]; 579 nrow1 = ailen[row]; 580 low1 = 0; 581 high1 = nrow1; 582 lastcol2 = -1; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 590 for (j=0; j<n; j++) { 591 if (roworiented) value = v[i*n+j]; 592 else value = v[i+j*m]; 593 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 594 if (in[j] >= cstart && in[j] < cend) { 595 col = in[j] - cstart; 596 nonew = a->nonew; 597 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 598 } else if (in[j] < 0) continue; 599 #if defined(PETSC_USE_DEBUG) 600 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 601 #endif 602 else { 603 if (mat->was_assembled) { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[in[j]] - 1; 612 #endif 613 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 614 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 615 col = in[j]; 616 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 617 B = aij->B; 618 b = (Mat_SeqAIJ*)B->data; 619 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 620 rp2 = bj + bi[row]; 621 ap2 = ba + bi[row]; 622 rmax2 = bimax[row]; 623 nrow2 = bilen[row]; 624 low2 = 0; 625 high2 = nrow2; 626 bm = aij->B->rmap->n; 627 ba = b->a; 628 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 629 } else col = in[j]; 630 nonew = b->nonew; 631 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 632 } 633 } 634 } else { 635 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 636 if (!aij->donotstash) { 637 mat->assembled = PETSC_FALSE; 638 if (roworiented) { 639 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 640 } else { 641 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 642 } 643 } 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatGetValues_MPIAIJ" 651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 656 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 657 658 PetscFunctionBegin; 659 for (i=0; i<m; i++) { 660 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 661 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 662 if (idxm[i] >= rstart && idxm[i] < rend) { 663 row = idxm[i] - rstart; 664 for (j=0; j<n; j++) { 665 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 666 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 667 if (idxn[j] >= cstart && idxn[j] < cend) { 668 col = idxn[j] - cstart; 669 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 670 } else { 671 if (!aij->colmap) { 672 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 673 } 674 #if defined(PETSC_USE_CTABLE) 675 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 676 col--; 677 #else 678 col = aij->colmap[idxn[j]] - 1; 679 #endif 680 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 681 else { 682 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 683 } 684 } 685 } 686 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 696 { 697 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 698 PetscErrorCode ierr; 699 PetscInt nstash,reallocs; 700 701 PetscFunctionBegin; 702 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 703 704 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 705 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 706 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt n; 718 PetscInt i,j,rstart,ncols,flg; 719 PetscInt *row,*col; 720 PetscBool other_disassembled; 721 PetscScalar *val; 722 723 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 724 725 PetscFunctionBegin; 726 if (!aij->donotstash && !mat->nooffprocentries) { 727 while (1) { 728 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 729 if (!flg) break; 730 731 for (i=0; i<n; ) { 732 /* Now identify the consecutive vals belonging to the same row */ 733 for (j=i,rstart=row[j]; j<n; j++) { 734 if (row[j] != rstart) break; 735 } 736 if (j < n) ncols = j-i; 737 else ncols = n-i; 738 /* Now assemble all these values with a single function call */ 739 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 740 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 745 } 746 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 748 749 /* determine if any processor has disassembled, if so we must 750 also disassemble ourselfs, in order that we may reassemble. */ 751 /* 752 if nonzero structure of submatrix B cannot change then we know that 753 no processor disassembled thus we can skip this stuff 754 */ 755 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 756 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 if (mat->was_assembled && !other_disassembled) { 758 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 759 } 760 } 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 763 } 764 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 767 768 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 769 770 aij->rowvalues = 0; 771 772 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 773 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 774 775 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 776 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 777 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 778 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 786 { 787 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 792 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatZeroRows_MPIAIJ" 798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 799 { 800 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 801 PetscInt *lrows; 802 PetscInt r, len; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 /* get locally owned rows */ 807 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 808 /* fix right hand side if needed */ 809 if (x && b) { 810 const PetscScalar *xx; 811 PetscScalar *bb; 812 813 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 814 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 816 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 817 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 818 } 819 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 820 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 822 PetscBool cong; 823 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 824 if (cong) A->congruentlayouts = 1; 825 else A->congruentlayouts = 0; 826 } 827 if ((diag != 0.0) && A->congruentlayouts) { 828 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 829 } else if (diag != 0.0) { 830 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 831 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 832 for (r = 0; r < len; ++r) { 833 const PetscInt row = lrows[r] + A->rmap->rstart; 834 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 835 } 836 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 837 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 } else { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 843 /* only change matrix nonzero state if pattern was allowed to be changed */ 844 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 845 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscMPIInt n = A->rmap->n; 858 PetscInt i,j,r,m,p = 0,len = 0; 859 PetscInt *lrows,*owners = A->rmap->range; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x) { 903 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 906 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 907 } 908 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 909 /* remove zeroed rows of off diagonal matrix */ 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij->a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij->a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 955 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 956 ierr = PetscFree(lrows);CHKERRQ(ierr); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMult_MPIAIJ" 968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt nt; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultAdd_MPIAIJ" 998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 PetscBool merged; 1018 1019 PetscFunctionBegin; 1020 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1021 /* do nondiagonal part */ 1022 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1023 if (!merged) { 1024 /* send it on its way */ 1025 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1026 /* do local part */ 1027 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1028 /* receive remote parts: note this assumes the values are not actually */ 1029 /* added in yy until the next line, */ 1030 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1031 } else { 1032 /* do local part */ 1033 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1034 /* send it on its way */ 1035 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 /* values actually were received in the Begin() but we need to call this nop */ 1037 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1044 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1045 { 1046 MPI_Comm comm; 1047 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1048 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1049 IS Me,Notme; 1050 PetscErrorCode ierr; 1051 PetscInt M,N,first,last,*notme,i; 1052 PetscMPIInt size; 1053 1054 PetscFunctionBegin; 1055 /* Easy test: symmetric diagonal block */ 1056 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1057 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1058 if (!*f) PetscFunctionReturn(0); 1059 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1061 if (size == 1) PetscFunctionReturn(0); 1062 1063 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1064 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1065 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1066 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1067 for (i=0; i<first; i++) notme[i] = i; 1068 for (i=last; i<M; i++) notme[i-last+first] = i; 1069 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1070 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1071 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1072 Aoff = Aoffs[0]; 1073 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1074 Boff = Boffs[0]; 1075 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1076 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1077 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1078 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1079 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1080 ierr = PetscFree(notme);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 /* do nondiagonal part */ 1093 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1094 /* send it on its way */ 1095 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 /* do local part */ 1097 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1098 /* receive remote parts */ 1099 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* 1104 This only works correctly for square matrices where the subblock A->A is the 1105 diagonal block 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1110 { 1111 PetscErrorCode ierr; 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 1114 PetscFunctionBegin; 1115 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1116 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1117 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatScale_MPIAIJ" 1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1130 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatDestroy_MPIAIJ" 1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1137 { 1138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 #if defined(PETSC_USE_LOG) 1143 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1144 #endif 1145 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1146 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1147 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1149 #if defined(PETSC_USE_CTABLE) 1150 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1151 #else 1152 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1153 #endif 1154 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1156 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1157 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1158 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1159 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1160 1161 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1169 #if defined(PETSC_HAVE_ELEMENTAL) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 PetscFunctionReturn(0); 1173 } 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1177 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1178 { 1179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1180 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1181 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1182 PetscErrorCode ierr; 1183 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1184 int fd; 1185 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1186 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1187 PetscScalar *column_values; 1188 PetscInt message_count,flowcontrolcount; 1189 FILE *file; 1190 1191 PetscFunctionBegin; 1192 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1193 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1194 nz = A->nz + B->nz; 1195 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1196 if (!rank) { 1197 header[0] = MAT_FILE_CLASSID; 1198 header[1] = mat->rmap->N; 1199 header[2] = mat->cmap->N; 1200 1201 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1202 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1203 /* get largest number of rows any processor has */ 1204 rlen = mat->rmap->n; 1205 range = mat->rmap->range; 1206 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1207 } else { 1208 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1209 rlen = mat->rmap->n; 1210 } 1211 1212 /* load up the local row counts */ 1213 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1214 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1215 1216 /* store the row lengths to the file */ 1217 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1218 if (!rank) { 1219 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 rlen = range[i+1] - range[i]; 1223 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1224 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 } 1226 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1227 } else { 1228 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1229 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1231 } 1232 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1233 1234 /* load up the local column indices */ 1235 nzmax = nz; /* th processor needs space a largest processor needs */ 1236 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1238 cnt = 0; 1239 for (i=0; i<mat->rmap->n; i++) { 1240 for (j=B->i[i]; j<B->i[i+1]; j++) { 1241 if ((col = garray[B->j[j]]) > cstart) break; 1242 column_indices[cnt++] = col; 1243 } 1244 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1245 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1246 } 1247 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1248 1249 /* store the column indices to the file */ 1250 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1251 if (!rank) { 1252 MPI_Status status; 1253 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1254 for (i=1; i<size; i++) { 1255 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1256 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1257 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1258 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1269 1270 /* load up the local column values */ 1271 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1272 cnt = 0; 1273 for (i=0; i<mat->rmap->n; i++) { 1274 for (j=B->i[i]; j<B->i[i+1]; j++) { 1275 if (garray[B->j[j]] > cstart) break; 1276 column_values[cnt++] = B->a[j]; 1277 } 1278 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1279 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1280 } 1281 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1282 1283 /* store the column values to the file */ 1284 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1285 if (!rank) { 1286 MPI_Status status; 1287 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1288 for (i=1; i<size; i++) { 1289 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1290 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1291 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1292 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1293 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1294 } 1295 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1296 } else { 1297 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1298 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1299 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1300 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1301 } 1302 ierr = PetscFree(column_values);CHKERRQ(ierr); 1303 1304 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1305 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #include <petscdraw.h> 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1312 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 PetscErrorCode ierr; 1316 PetscMPIInt rank = aij->rank,size = aij->size; 1317 PetscBool isdraw,iascii,isbinary; 1318 PetscViewer sviewer; 1319 PetscViewerFormat format; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1323 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1325 if (iascii) { 1326 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1327 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1328 MatInfo info; 1329 PetscBool inodes; 1330 1331 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1332 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1333 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1335 if (!inodes) { 1336 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1337 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1340 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1341 } 1342 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1344 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1348 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1349 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1350 PetscFunctionReturn(0); 1351 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1352 PetscInt inodecount,inodelimit,*inodes; 1353 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1354 if (inodes) { 1355 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1356 } else { 1357 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1358 } 1359 PetscFunctionReturn(0); 1360 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1361 PetscFunctionReturn(0); 1362 } 1363 } else if (isbinary) { 1364 if (size == 1) { 1365 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1366 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1367 } else { 1368 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1369 } 1370 PetscFunctionReturn(0); 1371 } else if (isdraw) { 1372 PetscDraw draw; 1373 PetscBool isnull; 1374 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1375 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1376 if (isnull) PetscFunctionReturn(0); 1377 } 1378 1379 { 1380 /* assemble the entire matrix onto first processor. */ 1381 Mat A; 1382 Mat_SeqAIJ *Aloc; 1383 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1384 MatScalar *a; 1385 1386 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1387 if (!rank) { 1388 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1389 } else { 1390 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1391 } 1392 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1393 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1394 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1395 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1396 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1397 1398 /* copy over the A part */ 1399 Aloc = (Mat_SeqAIJ*)aij->A->data; 1400 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1401 row = mat->rmap->rstart; 1402 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; 1406 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1407 } 1408 aj = Aloc->j; 1409 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1410 1411 /* copy over the B part */ 1412 Aloc = (Mat_SeqAIJ*)aij->B->data; 1413 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1414 row = mat->rmap->rstart; 1415 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1416 ct = cols; 1417 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1418 for (i=0; i<m; i++) { 1419 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1420 row++; 1421 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1422 } 1423 ierr = PetscFree(ct);CHKERRQ(ierr); 1424 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1425 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 /* 1427 Everyone has to call to draw the matrix since the graphics waits are 1428 synchronized across all processors that share the PetscDraw object 1429 */ 1430 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1431 if (!rank) { 1432 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1433 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1436 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1437 ierr = MatDestroy(&A);CHKERRQ(ierr); 1438 } 1439 PetscFunctionReturn(0); 1440 } 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ" 1444 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1445 { 1446 PetscErrorCode ierr; 1447 PetscBool iascii,isdraw,issocket,isbinary; 1448 1449 PetscFunctionBegin; 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1454 if (iascii || isdraw || isbinary || issocket) { 1455 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } 1459 1460 #undef __FUNCT__ 1461 #define __FUNCT__ "MatSOR_MPIAIJ" 1462 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1463 { 1464 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1465 PetscErrorCode ierr; 1466 Vec bb1 = 0; 1467 PetscBool hasop; 1468 1469 PetscFunctionBegin; 1470 if (flag == SOR_APPLY_UPPER) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1476 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1477 } 1478 1479 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1480 if (flag & SOR_ZERO_INITIAL_GUESS) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 its--; 1483 } 1484 1485 while (its--) { 1486 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 1489 /* update rhs: bb1 = bb - B*x */ 1490 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1492 1493 /* local sweep */ 1494 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1495 } 1496 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1497 if (flag & SOR_ZERO_INITIAL_GUESS) { 1498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1499 its--; 1500 } 1501 while (its--) { 1502 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 1505 /* update rhs: bb1 = bb - B*x */ 1506 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1507 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1511 } 1512 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1513 if (flag & SOR_ZERO_INITIAL_GUESS) { 1514 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1515 its--; 1516 } 1517 while (its--) { 1518 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 1521 /* update rhs: bb1 = bb - B*x */ 1522 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1523 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1524 1525 /* local sweep */ 1526 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1527 } 1528 } else if (flag & SOR_EISENSTAT) { 1529 Vec xx1; 1530 1531 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1532 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1533 1534 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 if (!mat->diag) { 1537 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1538 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1539 } 1540 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1541 if (hasop) { 1542 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1543 } else { 1544 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1545 } 1546 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1547 1548 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1549 1550 /* local sweep */ 1551 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1552 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1553 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1554 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1555 1556 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1557 1558 matin->factorerrortype = mat->A->factorerrortype; 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatPermute_MPIAIJ" 1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1565 { 1566 Mat aA,aB,Aperm; 1567 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1568 PetscScalar *aa,*ba; 1569 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1570 PetscSF rowsf,sf; 1571 IS parcolp = NULL; 1572 PetscBool done; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1577 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1578 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1579 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1580 1581 /* Invert row permutation to find out where my rows should go */ 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1585 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1586 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1588 1589 /* Invert column permutation to find out where my columns should go */ 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1594 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1597 1598 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1599 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1600 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1601 1602 /* Find out where my gcols should go */ 1603 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1604 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1605 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1606 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1607 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1608 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1610 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1611 1612 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1613 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1614 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1615 for (i=0; i<m; i++) { 1616 PetscInt row = rdest[i],rowner; 1617 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 PetscInt cowner,col = cdest[aj[j]]; 1620 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1621 if (rowner == cowner) dnnz[i]++; 1622 else onnz[i]++; 1623 } 1624 for (j=bi[i]; j<bi[i+1]; j++) { 1625 PetscInt cowner,col = gcdest[bj[j]]; 1626 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1627 if (rowner == cowner) dnnz[i]++; 1628 else onnz[i]++; 1629 } 1630 } 1631 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1632 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1633 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1634 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1636 1637 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1638 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1639 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1642 PetscInt j0,rowlen; 1643 rowlen = ai[i+1] - ai[i]; 1644 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1645 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1646 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1647 } 1648 rowlen = bi[i+1] - bi[i]; 1649 for (j0=j=0; j<rowlen; j0=j) { 1650 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1651 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } 1654 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1655 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1656 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1657 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1658 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1659 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1660 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1661 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1662 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1663 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1664 *B = Aperm; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1670 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1671 { 1672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1673 PetscErrorCode ierr; 1674 1675 PetscFunctionBegin; 1676 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1677 if (ghosts) *ghosts = aij->garray; 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1683 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1684 { 1685 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1686 Mat A = mat->A,B = mat->B; 1687 PetscErrorCode ierr; 1688 PetscReal isend[5],irecv[5]; 1689 1690 PetscFunctionBegin; 1691 info->block_size = 1.0; 1692 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1693 1694 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1695 isend[3] = info->memory; isend[4] = info->mallocs; 1696 1697 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1698 1699 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1700 isend[3] += info->memory; isend[4] += info->mallocs; 1701 if (flag == MAT_LOCAL) { 1702 info->nz_used = isend[0]; 1703 info->nz_allocated = isend[1]; 1704 info->nz_unneeded = isend[2]; 1705 info->memory = isend[3]; 1706 info->mallocs = isend[4]; 1707 } else if (flag == MAT_GLOBAL_MAX) { 1708 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1709 1710 info->nz_used = irecv[0]; 1711 info->nz_allocated = irecv[1]; 1712 info->nz_unneeded = irecv[2]; 1713 info->memory = irecv[3]; 1714 info->mallocs = irecv[4]; 1715 } else if (flag == MAT_GLOBAL_SUM) { 1716 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1717 1718 info->nz_used = irecv[0]; 1719 info->nz_allocated = irecv[1]; 1720 info->nz_unneeded = irecv[2]; 1721 info->memory = irecv[3]; 1722 info->mallocs = irecv[4]; 1723 } 1724 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1725 info->fill_ratio_needed = 0; 1726 info->factor_mallocs = 0; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatSetOption_MPIAIJ" 1732 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1733 { 1734 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 switch (op) { 1739 case MAT_NEW_NONZERO_LOCATIONS: 1740 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1741 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1742 case MAT_KEEP_NONZERO_PATTERN: 1743 case MAT_NEW_NONZERO_LOCATION_ERR: 1744 case MAT_USE_INODES: 1745 case MAT_IGNORE_ZERO_ENTRIES: 1746 MatCheckPreallocated(A,1); 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_ROW_ORIENTED: 1751 MatCheckPreallocated(A,1); 1752 a->roworiented = flg; 1753 1754 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1755 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1756 break; 1757 case MAT_NEW_DIAGONALS: 1758 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1759 break; 1760 case MAT_IGNORE_OFF_PROC_ENTRIES: 1761 a->donotstash = flg; 1762 break; 1763 case MAT_SPD: 1764 A->spd_set = PETSC_TRUE; 1765 A->spd = flg; 1766 if (flg) { 1767 A->symmetric = PETSC_TRUE; 1768 A->structurally_symmetric = PETSC_TRUE; 1769 A->symmetric_set = PETSC_TRUE; 1770 A->structurally_symmetric_set = PETSC_TRUE; 1771 } 1772 break; 1773 case MAT_SYMMETRIC: 1774 MatCheckPreallocated(A,1); 1775 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1776 break; 1777 case MAT_STRUCTURALLY_SYMMETRIC: 1778 MatCheckPreallocated(A,1); 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_HERMITIAN: 1782 MatCheckPreallocated(A,1); 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_SYMMETRY_ETERNAL: 1786 MatCheckPreallocated(A,1); 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 break; 1789 default: 1790 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1791 } 1792 PetscFunctionReturn(0); 1793 } 1794 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatGetRow_MPIAIJ" 1797 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1800 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1801 PetscErrorCode ierr; 1802 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1803 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1804 PetscInt *cmap,*idx_p; 1805 1806 PetscFunctionBegin; 1807 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1808 mat->getrowactive = PETSC_TRUE; 1809 1810 if (!mat->rowvalues && (idx || v)) { 1811 /* 1812 allocate enough space to hold information from the longest row. 1813 */ 1814 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1815 PetscInt max = 1,tmp; 1816 for (i=0; i<matin->rmap->n; i++) { 1817 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1818 if (max < tmp) max = tmp; 1819 } 1820 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1821 } 1822 1823 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1824 lrow = row - rstart; 1825 1826 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1827 if (!v) {pvA = 0; pvB = 0;} 1828 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1829 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1830 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1831 nztot = nzA + nzB; 1832 1833 cmap = mat->garray; 1834 if (v || idx) { 1835 if (nztot) { 1836 /* Sort by increasing column numbers, assuming A and B already sorted */ 1837 PetscInt imark = -1; 1838 if (v) { 1839 *v = v_p = mat->rowvalues; 1840 for (i=0; i<nzB; i++) { 1841 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1842 else break; 1843 } 1844 imark = i; 1845 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1846 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1847 } 1848 if (idx) { 1849 *idx = idx_p = mat->rowindices; 1850 if (imark > -1) { 1851 for (i=0; i<imark; i++) { 1852 idx_p[i] = cmap[cworkB[i]]; 1853 } 1854 } else { 1855 for (i=0; i<nzB; i++) { 1856 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1857 else break; 1858 } 1859 imark = i; 1860 } 1861 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1862 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1863 } 1864 } else { 1865 if (idx) *idx = 0; 1866 if (v) *v = 0; 1867 } 1868 } 1869 *nz = nztot; 1870 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1871 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1877 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1878 { 1879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1880 1881 PetscFunctionBegin; 1882 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1883 aij->getrowactive = PETSC_FALSE; 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatNorm_MPIAIJ" 1889 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1890 { 1891 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1892 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1893 PetscErrorCode ierr; 1894 PetscInt i,j,cstart = mat->cmap->rstart; 1895 PetscReal sum = 0.0; 1896 MatScalar *v; 1897 1898 PetscFunctionBegin; 1899 if (aij->size == 1) { 1900 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1901 } else { 1902 if (type == NORM_FROBENIUS) { 1903 v = amat->a; 1904 for (i=0; i<amat->nz; i++) { 1905 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1906 } 1907 v = bmat->a; 1908 for (i=0; i<bmat->nz; i++) { 1909 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1910 } 1911 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1912 *norm = PetscSqrtReal(*norm); 1913 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1914 } else if (type == NORM_1) { /* max column norm */ 1915 PetscReal *tmp,*tmp2; 1916 PetscInt *jj,*garray = aij->garray; 1917 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1918 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1919 *norm = 0.0; 1920 v = amat->a; jj = amat->j; 1921 for (j=0; j<amat->nz; j++) { 1922 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1923 } 1924 v = bmat->a; jj = bmat->j; 1925 for (j=0; j<bmat->nz; j++) { 1926 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1927 } 1928 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1929 for (j=0; j<mat->cmap->N; j++) { 1930 if (tmp2[j] > *norm) *norm = tmp2[j]; 1931 } 1932 ierr = PetscFree(tmp);CHKERRQ(ierr); 1933 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1934 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1935 } else if (type == NORM_INFINITY) { /* max row norm */ 1936 PetscReal ntemp = 0.0; 1937 for (j=0; j<aij->A->rmap->n; j++) { 1938 v = amat->a + amat->i[j]; 1939 sum = 0.0; 1940 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1941 sum += PetscAbsScalar(*v); v++; 1942 } 1943 v = bmat->a + bmat->i[j]; 1944 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1945 sum += PetscAbsScalar(*v); v++; 1946 } 1947 if (sum > ntemp) ntemp = sum; 1948 } 1949 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1950 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1951 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1952 } 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatTranspose_MPIAIJ" 1958 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1959 { 1960 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1961 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1962 PetscErrorCode ierr; 1963 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1964 PetscInt cstart = A->cmap->rstart,ncol; 1965 Mat B; 1966 MatScalar *array; 1967 1968 PetscFunctionBegin; 1969 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1970 1971 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1972 ai = Aloc->i; aj = Aloc->j; 1973 bi = Bloc->i; bj = Bloc->j; 1974 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1975 PetscInt *d_nnz,*g_nnz,*o_nnz; 1976 PetscSFNode *oloc; 1977 PETSC_UNUSED PetscSF sf; 1978 1979 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1980 /* compute d_nnz for preallocation */ 1981 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1982 for (i=0; i<ai[ma]; i++) { 1983 d_nnz[aj[i]]++; 1984 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1985 } 1986 /* compute local off-diagonal contributions */ 1987 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1988 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1989 /* map those to global */ 1990 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1991 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1992 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1993 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1994 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1995 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1996 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1997 1998 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1999 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2000 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2001 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2002 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2003 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2004 } else { 2005 B = *matout; 2006 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2007 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2008 } 2009 2010 /* copy over the A part */ 2011 array = Aloc->a; 2012 row = A->rmap->rstart; 2013 for (i=0; i<ma; i++) { 2014 ncol = ai[i+1]-ai[i]; 2015 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; aj += ncol; 2018 } 2019 aj = Aloc->j; 2020 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2021 2022 /* copy over the B part */ 2023 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2024 array = Bloc->a; 2025 row = A->rmap->rstart; 2026 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2027 cols_tmp = cols; 2028 for (i=0; i<mb; i++) { 2029 ncol = bi[i+1]-bi[i]; 2030 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2031 row++; 2032 array += ncol; cols_tmp += ncol; 2033 } 2034 ierr = PetscFree(cols);CHKERRQ(ierr); 2035 2036 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2037 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2039 *matout = B; 2040 } else { 2041 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2042 } 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2048 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2049 { 2050 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2051 Mat a = aij->A,b = aij->B; 2052 PetscErrorCode ierr; 2053 PetscInt s1,s2,s3; 2054 2055 PetscFunctionBegin; 2056 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2057 if (rr) { 2058 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2059 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2060 /* Overlap communication with computation. */ 2061 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 } 2063 if (ll) { 2064 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2065 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2066 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2067 } 2068 /* scale the diagonal block */ 2069 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2070 2071 if (rr) { 2072 /* Do a scatter end and then right scale the off-diagonal block */ 2073 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2074 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2081 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2082 { 2083 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatEqual_MPIAIJ" 2093 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2094 { 2095 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2096 Mat a,b,c,d; 2097 PetscBool flg; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 a = matA->A; b = matA->B; 2102 c = matB->A; d = matB->B; 2103 2104 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2105 if (flg) { 2106 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2107 } 2108 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatCopy_MPIAIJ" 2114 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2115 { 2116 PetscErrorCode ierr; 2117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2118 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2119 2120 PetscFunctionBegin; 2121 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2122 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2123 /* because of the column compression in the off-processor part of the matrix a->B, 2124 the number of columns in a->B and b->B may be different, hence we cannot call 2125 the MatCopy() directly on the two parts. If need be, we can provide a more 2126 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2127 then copying the submatrices */ 2128 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2129 } else { 2130 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2131 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatSetUp_MPIAIJ" 2138 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2139 { 2140 PetscErrorCode ierr; 2141 2142 PetscFunctionBegin; 2143 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /* 2148 Computes the number of nonzeros per row needed for preallocation when X and Y 2149 have different nonzero structure. 2150 */ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2153 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2154 { 2155 PetscInt i,j,k,nzx,nzy; 2156 2157 PetscFunctionBegin; 2158 /* Set the number of nonzeros in the new matrix */ 2159 for (i=0; i<m; i++) { 2160 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2161 nzx = xi[i+1] - xi[i]; 2162 nzy = yi[i+1] - yi[i]; 2163 nnz[i] = 0; 2164 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2165 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2166 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2167 nnz[i]++; 2168 } 2169 for (; k<nzy; k++) nnz[i]++; 2170 } 2171 PetscFunctionReturn(0); 2172 } 2173 2174 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2177 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2178 { 2179 PetscErrorCode ierr; 2180 PetscInt m = Y->rmap->N; 2181 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2182 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2183 2184 PetscFunctionBegin; 2185 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2186 PetscFunctionReturn(0); 2187 } 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatAXPY_MPIAIJ" 2191 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2192 { 2193 PetscErrorCode ierr; 2194 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2195 PetscBLASInt bnz,one=1; 2196 Mat_SeqAIJ *x,*y; 2197 2198 PetscFunctionBegin; 2199 if (str == SAME_NONZERO_PATTERN) { 2200 PetscScalar alpha = a; 2201 x = (Mat_SeqAIJ*)xx->A->data; 2202 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2203 y = (Mat_SeqAIJ*)yy->A->data; 2204 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2205 x = (Mat_SeqAIJ*)xx->B->data; 2206 y = (Mat_SeqAIJ*)yy->B->data; 2207 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2208 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2209 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2210 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2211 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2212 } else { 2213 Mat B; 2214 PetscInt *nnz_d,*nnz_o; 2215 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2216 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2217 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2218 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2219 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2220 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2221 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2222 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2223 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2224 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2225 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2226 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2227 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2228 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2229 } 2230 PetscFunctionReturn(0); 2231 } 2232 2233 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatConjugate_MPIAIJ" 2237 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2238 { 2239 #if defined(PETSC_USE_COMPLEX) 2240 PetscErrorCode ierr; 2241 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2242 2243 PetscFunctionBegin; 2244 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2245 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2246 #else 2247 PetscFunctionBegin; 2248 #endif 2249 PetscFunctionReturn(0); 2250 } 2251 2252 #undef __FUNCT__ 2253 #define __FUNCT__ "MatRealPart_MPIAIJ" 2254 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2255 { 2256 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2257 PetscErrorCode ierr; 2258 2259 PetscFunctionBegin; 2260 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2261 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2262 PetscFunctionReturn(0); 2263 } 2264 2265 #undef __FUNCT__ 2266 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2267 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2268 { 2269 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2270 PetscErrorCode ierr; 2271 2272 PetscFunctionBegin; 2273 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2274 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2280 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 PetscInt i,*idxb = 0; 2285 PetscScalar *va,*vb; 2286 Vec vtmp; 2287 2288 PetscFunctionBegin; 2289 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2290 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2291 if (idx) { 2292 for (i=0; i<A->rmap->n; i++) { 2293 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2294 } 2295 } 2296 2297 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2298 if (idx) { 2299 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2300 } 2301 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2302 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2303 2304 for (i=0; i<A->rmap->n; i++) { 2305 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2306 va[i] = vb[i]; 2307 if (idx) idx[i] = a->garray[idxb[i]]; 2308 } 2309 } 2310 2311 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2312 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2313 ierr = PetscFree(idxb);CHKERRQ(ierr); 2314 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 #undef __FUNCT__ 2319 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2320 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2321 { 2322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2323 PetscErrorCode ierr; 2324 PetscInt i,*idxb = 0; 2325 PetscScalar *va,*vb; 2326 Vec vtmp; 2327 2328 PetscFunctionBegin; 2329 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2330 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2331 if (idx) { 2332 for (i=0; i<A->cmap->n; i++) { 2333 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2334 } 2335 } 2336 2337 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2338 if (idx) { 2339 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2340 } 2341 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2342 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2343 2344 for (i=0; i<A->rmap->n; i++) { 2345 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2346 va[i] = vb[i]; 2347 if (idx) idx[i] = a->garray[idxb[i]]; 2348 } 2349 } 2350 2351 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2352 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2353 ierr = PetscFree(idxb);CHKERRQ(ierr); 2354 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2360 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2361 { 2362 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2363 PetscInt n = A->rmap->n; 2364 PetscInt cstart = A->cmap->rstart; 2365 PetscInt *cmap = mat->garray; 2366 PetscInt *diagIdx, *offdiagIdx; 2367 Vec diagV, offdiagV; 2368 PetscScalar *a, *diagA, *offdiagA; 2369 PetscInt r; 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2374 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2375 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2376 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2377 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2378 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2379 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2380 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2381 for (r = 0; r < n; ++r) { 2382 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2383 a[r] = diagA[r]; 2384 idx[r] = cstart + diagIdx[r]; 2385 } else { 2386 a[r] = offdiagA[r]; 2387 idx[r] = cmap[offdiagIdx[r]]; 2388 } 2389 } 2390 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2391 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2392 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2393 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2394 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2395 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2401 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2402 { 2403 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2404 PetscInt n = A->rmap->n; 2405 PetscInt cstart = A->cmap->rstart; 2406 PetscInt *cmap = mat->garray; 2407 PetscInt *diagIdx, *offdiagIdx; 2408 Vec diagV, offdiagV; 2409 PetscScalar *a, *diagA, *offdiagA; 2410 PetscInt r; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2415 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2416 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2417 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2418 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2419 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2420 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2421 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2422 for (r = 0; r < n; ++r) { 2423 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2424 a[r] = diagA[r]; 2425 idx[r] = cstart + diagIdx[r]; 2426 } else { 2427 a[r] = offdiagA[r]; 2428 idx[r] = cmap[offdiagIdx[r]]; 2429 } 2430 } 2431 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2432 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2433 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2434 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2435 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2436 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2442 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2443 { 2444 PetscErrorCode ierr; 2445 Mat *dummy; 2446 2447 PetscFunctionBegin; 2448 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2449 *newmat = *dummy; 2450 ierr = PetscFree(dummy);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2456 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2457 { 2458 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2463 A->factorerrortype = a->A->factorerrortype; 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2469 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2470 { 2471 PetscErrorCode ierr; 2472 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2473 2474 PetscFunctionBegin; 2475 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2476 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2477 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2484 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2485 { 2486 PetscFunctionBegin; 2487 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2488 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2494 /*@ 2495 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2496 2497 Collective on Mat 2498 2499 Input Parameters: 2500 + A - the matrix 2501 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2502 2503 Level: advanced 2504 2505 @*/ 2506 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2507 { 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 #undef __FUNCT__ 2516 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2517 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2518 { 2519 PetscErrorCode ierr; 2520 PetscBool sc = PETSC_FALSE,flg; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2524 ierr = PetscObjectOptionsBegin((PetscObject)A); 2525 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2526 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2527 if (flg) { 2528 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2529 } 2530 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #undef __FUNCT__ 2535 #define __FUNCT__ "MatShift_MPIAIJ" 2536 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2537 { 2538 PetscErrorCode ierr; 2539 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2540 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2541 2542 PetscFunctionBegin; 2543 if (!Y->preallocated) { 2544 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2545 } else if (!aij->nz) { 2546 PetscInt nonew = aij->nonew; 2547 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2548 aij->nonew = nonew; 2549 } 2550 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2556 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2557 { 2558 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2559 PetscErrorCode ierr; 2560 2561 PetscFunctionBegin; 2562 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2563 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2564 if (d) { 2565 PetscInt rstart; 2566 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2567 *d += rstart; 2568 2569 } 2570 PetscFunctionReturn(0); 2571 } 2572 2573 2574 /* -------------------------------------------------------------------*/ 2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2576 MatGetRow_MPIAIJ, 2577 MatRestoreRow_MPIAIJ, 2578 MatMult_MPIAIJ, 2579 /* 4*/ MatMultAdd_MPIAIJ, 2580 MatMultTranspose_MPIAIJ, 2581 MatMultTransposeAdd_MPIAIJ, 2582 0, 2583 0, 2584 0, 2585 /*10*/ 0, 2586 0, 2587 0, 2588 MatSOR_MPIAIJ, 2589 MatTranspose_MPIAIJ, 2590 /*15*/ MatGetInfo_MPIAIJ, 2591 MatEqual_MPIAIJ, 2592 MatGetDiagonal_MPIAIJ, 2593 MatDiagonalScale_MPIAIJ, 2594 MatNorm_MPIAIJ, 2595 /*20*/ MatAssemblyBegin_MPIAIJ, 2596 MatAssemblyEnd_MPIAIJ, 2597 MatSetOption_MPIAIJ, 2598 MatZeroEntries_MPIAIJ, 2599 /*24*/ MatZeroRows_MPIAIJ, 2600 0, 2601 0, 2602 0, 2603 0, 2604 /*29*/ MatSetUp_MPIAIJ, 2605 0, 2606 0, 2607 MatGetDiagonalBlock_MPIAIJ, 2608 0, 2609 /*34*/ MatDuplicate_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*39*/ MatAXPY_MPIAIJ, 2615 MatGetSubMatrices_MPIAIJ, 2616 MatIncreaseOverlap_MPIAIJ, 2617 MatGetValues_MPIAIJ, 2618 MatCopy_MPIAIJ, 2619 /*44*/ MatGetRowMax_MPIAIJ, 2620 MatScale_MPIAIJ, 2621 MatShift_MPIAIJ, 2622 MatDiagonalSet_MPIAIJ, 2623 MatZeroRowsColumns_MPIAIJ, 2624 /*49*/ MatSetRandom_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*54*/ MatFDColoringCreate_MPIXAIJ, 2630 0, 2631 MatSetUnfactored_MPIAIJ, 2632 MatPermute_MPIAIJ, 2633 0, 2634 /*59*/ MatGetSubMatrix_MPIAIJ, 2635 MatDestroy_MPIAIJ, 2636 MatView_MPIAIJ, 2637 0, 2638 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2639 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2640 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2645 MatGetRowMinAbs_MPIAIJ, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*75*/ MatFDColoringApply_AIJ, 2651 MatSetFromOptions_MPIAIJ, 2652 0, 2653 0, 2654 MatFindZeroDiagonals_MPIAIJ, 2655 /*80*/ 0, 2656 0, 2657 0, 2658 /*83*/ MatLoad_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2665 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2666 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2667 MatPtAP_MPIAIJ_MPIAIJ, 2668 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2669 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*99*/ 0, 2675 0, 2676 0, 2677 MatConjugate_MPIAIJ, 2678 0, 2679 /*104*/MatSetValuesRow_MPIAIJ, 2680 MatRealPart_MPIAIJ, 2681 MatImaginaryPart_MPIAIJ, 2682 0, 2683 0, 2684 /*109*/0, 2685 0, 2686 MatGetRowMin_MPIAIJ, 2687 0, 2688 MatMissingDiagonal_MPIAIJ, 2689 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2690 0, 2691 MatGetGhosts_MPIAIJ, 2692 0, 2693 0, 2694 /*119*/0, 2695 0, 2696 0, 2697 0, 2698 MatGetMultiProcBlock_MPIAIJ, 2699 /*124*/MatFindNonzeroRows_MPIAIJ, 2700 MatGetColumnNorms_MPIAIJ, 2701 MatInvertBlockDiagonal_MPIAIJ, 2702 0, 2703 MatGetSubMatricesMPI_MPIAIJ, 2704 /*129*/0, 2705 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2706 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2707 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2708 0, 2709 /*134*/0, 2710 0, 2711 0, 2712 0, 2713 0, 2714 /*139*/MatSetBlockSizes_MPIAIJ, 2715 0, 2716 0, 2717 MatFDColoringSetUp_MPIXAIJ, 2718 MatFindOffBlockDiagonalEntries_MPIAIJ, 2719 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2720 }; 2721 2722 /* ----------------------------------------------------------------------------------------*/ 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2726 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2739 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2740 { 2741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2746 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2747 PetscFunctionReturn(0); 2748 } 2749 2750 #undef __FUNCT__ 2751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2752 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2753 { 2754 Mat_MPIAIJ *b; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2759 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2760 b = (Mat_MPIAIJ*)B->data; 2761 2762 if (!B->preallocated) { 2763 /* Explicitly create 2 MATSEQAIJ matrices. */ 2764 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2765 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2766 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2767 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2768 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2769 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2770 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2771 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2772 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2774 } 2775 2776 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2777 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2778 B->preallocated = PETSC_TRUE; 2779 PetscFunctionReturn(0); 2780 } 2781 2782 #undef __FUNCT__ 2783 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2785 { 2786 Mat mat; 2787 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 *newmat = 0; 2792 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2793 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2794 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2795 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2796 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2797 a = (Mat_MPIAIJ*)mat->data; 2798 2799 mat->factortype = matin->factortype; 2800 mat->assembled = PETSC_TRUE; 2801 mat->insertmode = NOT_SET_VALUES; 2802 mat->preallocated = PETSC_TRUE; 2803 2804 a->size = oldmat->size; 2805 a->rank = oldmat->rank; 2806 a->donotstash = oldmat->donotstash; 2807 a->roworiented = oldmat->roworiented; 2808 a->rowindices = 0; 2809 a->rowvalues = 0; 2810 a->getrowactive = PETSC_FALSE; 2811 2812 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2813 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2814 2815 if (oldmat->colmap) { 2816 #if defined(PETSC_USE_CTABLE) 2817 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2818 #else 2819 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2820 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2821 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2822 #endif 2823 } else a->colmap = 0; 2824 if (oldmat->garray) { 2825 PetscInt len; 2826 len = oldmat->B->cmap->n; 2827 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2829 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2830 } else a->garray = 0; 2831 2832 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2833 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2834 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2835 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2836 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2837 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2838 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2840 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2841 *newmat = mat; 2842 PetscFunctionReturn(0); 2843 } 2844 2845 2846 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatLoad_MPIAIJ" 2849 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2850 { 2851 PetscScalar *vals,*svals; 2852 MPI_Comm comm; 2853 PetscErrorCode ierr; 2854 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2855 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2856 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2857 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2858 PetscInt cend,cstart,n,*rowners; 2859 int fd; 2860 PetscInt bs = newMat->rmap->bs; 2861 2862 PetscFunctionBegin; 2863 /* force binary viewer to load .info file if it has not yet done so */ 2864 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2865 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2866 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2867 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2868 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2869 if (!rank) { 2870 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2871 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2872 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2873 } 2874 2875 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2876 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2877 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2878 if (bs < 0) bs = 1; 2879 2880 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2881 M = header[1]; N = header[2]; 2882 2883 /* If global sizes are set, check if they are consistent with that given in the file */ 2884 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2885 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2886 2887 /* determine ownership of all (block) rows */ 2888 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2889 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2890 else m = newMat->rmap->n; /* Set by user */ 2891 2892 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2893 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2894 2895 /* First process needs enough room for process with most rows */ 2896 if (!rank) { 2897 mmax = rowners[1]; 2898 for (i=2; i<=size; i++) { 2899 mmax = PetscMax(mmax, rowners[i]); 2900 } 2901 } else mmax = -1; /* unused, but compilers complain */ 2902 2903 rowners[0] = 0; 2904 for (i=2; i<=size; i++) { 2905 rowners[i] += rowners[i-1]; 2906 } 2907 rstart = rowners[rank]; 2908 rend = rowners[rank+1]; 2909 2910 /* distribute row lengths to all processors */ 2911 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2912 if (!rank) { 2913 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2914 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2915 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2916 for (j=0; j<m; j++) { 2917 procsnz[0] += ourlens[j]; 2918 } 2919 for (i=1; i<size; i++) { 2920 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2921 /* calculate the number of nonzeros on each processor */ 2922 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2923 procsnz[i] += rowlengths[j]; 2924 } 2925 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2926 } 2927 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2928 } else { 2929 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2930 } 2931 2932 if (!rank) { 2933 /* determine max buffer needed and allocate it */ 2934 maxnz = 0; 2935 for (i=0; i<size; i++) { 2936 maxnz = PetscMax(maxnz,procsnz[i]); 2937 } 2938 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2939 2940 /* read in my part of the matrix column indices */ 2941 nz = procsnz[0]; 2942 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2943 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2944 2945 /* read in every one elses and ship off */ 2946 for (i=1; i<size; i++) { 2947 nz = procsnz[i]; 2948 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2949 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2950 } 2951 ierr = PetscFree(cols);CHKERRQ(ierr); 2952 } else { 2953 /* determine buffer space needed for message */ 2954 nz = 0; 2955 for (i=0; i<m; i++) { 2956 nz += ourlens[i]; 2957 } 2958 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2959 2960 /* receive message of column indices*/ 2961 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2962 } 2963 2964 /* determine column ownership if matrix is not square */ 2965 if (N != M) { 2966 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2967 else n = newMat->cmap->n; 2968 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2969 cstart = cend - n; 2970 } else { 2971 cstart = rstart; 2972 cend = rend; 2973 n = cend - cstart; 2974 } 2975 2976 /* loop over local rows, determining number of off diagonal entries */ 2977 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2978 jj = 0; 2979 for (i=0; i<m; i++) { 2980 for (j=0; j<ourlens[i]; j++) { 2981 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2982 jj++; 2983 } 2984 } 2985 2986 for (i=0; i<m; i++) { 2987 ourlens[i] -= offlens[i]; 2988 } 2989 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2990 2991 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2992 2993 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2994 2995 for (i=0; i<m; i++) { 2996 ourlens[i] += offlens[i]; 2997 } 2998 2999 if (!rank) { 3000 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3001 3002 /* read in my part of the matrix numerical values */ 3003 nz = procsnz[0]; 3004 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3005 3006 /* insert into matrix */ 3007 jj = rstart; 3008 smycols = mycols; 3009 svals = vals; 3010 for (i=0; i<m; i++) { 3011 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3012 smycols += ourlens[i]; 3013 svals += ourlens[i]; 3014 jj++; 3015 } 3016 3017 /* read in other processors and ship out */ 3018 for (i=1; i<size; i++) { 3019 nz = procsnz[i]; 3020 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3021 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3022 } 3023 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3024 } else { 3025 /* receive numeric values */ 3026 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3027 3028 /* receive message of values*/ 3029 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3030 3031 /* insert into matrix */ 3032 jj = rstart; 3033 smycols = mycols; 3034 svals = vals; 3035 for (i=0; i<m; i++) { 3036 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3037 smycols += ourlens[i]; 3038 svals += ourlens[i]; 3039 jj++; 3040 } 3041 } 3042 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3043 ierr = PetscFree(vals);CHKERRQ(ierr); 3044 ierr = PetscFree(mycols);CHKERRQ(ierr); 3045 ierr = PetscFree(rowners);CHKERRQ(ierr); 3046 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3047 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3053 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3055 { 3056 PetscErrorCode ierr; 3057 IS iscol_local; 3058 PetscInt csize; 3059 3060 PetscFunctionBegin; 3061 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3062 if (call == MAT_REUSE_MATRIX) { 3063 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3064 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3065 } else { 3066 /* check if we are grabbing all columns*/ 3067 PetscBool isstride; 3068 PetscMPIInt lisstride = 0,gisstride; 3069 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3070 if (isstride) { 3071 PetscInt start,len,mstart,mlen; 3072 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3073 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3074 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3075 if (mstart == start && mlen-mstart == len) lisstride = 1; 3076 } 3077 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3078 if (gisstride) { 3079 PetscInt N; 3080 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3081 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3082 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3083 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3084 } else { 3085 PetscInt cbs; 3086 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3087 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3088 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3089 } 3090 } 3091 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3092 if (call == MAT_INITIAL_MATRIX) { 3093 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3094 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3095 } 3096 PetscFunctionReturn(0); 3097 } 3098 3099 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3100 #undef __FUNCT__ 3101 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3102 /* 3103 Not great since it makes two copies of the submatrix, first an SeqAIJ 3104 in local and then by concatenating the local matrices the end result. 3105 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3106 3107 Note: This requires a sequential iscol with all indices. 3108 */ 3109 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3110 { 3111 PetscErrorCode ierr; 3112 PetscMPIInt rank,size; 3113 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3114 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3115 PetscBool allcolumns, colflag; 3116 Mat M,Mreuse; 3117 MatScalar *vwork,*aa; 3118 MPI_Comm comm; 3119 Mat_SeqAIJ *aij; 3120 3121 PetscFunctionBegin; 3122 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3125 3126 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3127 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3128 if (colflag && ncol == mat->cmap->N) { 3129 allcolumns = PETSC_TRUE; 3130 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3131 } else { 3132 allcolumns = PETSC_FALSE; 3133 } 3134 if (call == MAT_REUSE_MATRIX) { 3135 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3136 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3137 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3138 } else { 3139 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3140 } 3141 3142 /* 3143 m - number of local rows 3144 n - number of columns (same on all processors) 3145 rstart - first row in new global matrix generated 3146 */ 3147 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3148 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3149 if (call == MAT_INITIAL_MATRIX) { 3150 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3151 ii = aij->i; 3152 jj = aij->j; 3153 3154 /* 3155 Determine the number of non-zeros in the diagonal and off-diagonal 3156 portions of the matrix in order to do correct preallocation 3157 */ 3158 3159 /* first get start and end of "diagonal" columns */ 3160 if (csize == PETSC_DECIDE) { 3161 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3162 if (mglobal == n) { /* square matrix */ 3163 nlocal = m; 3164 } else { 3165 nlocal = n/size + ((n % size) > rank); 3166 } 3167 } else { 3168 nlocal = csize; 3169 } 3170 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3171 rstart = rend - nlocal; 3172 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3173 3174 /* next, compute all the lengths */ 3175 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3176 olens = dlens + m; 3177 for (i=0; i<m; i++) { 3178 jend = ii[i+1] - ii[i]; 3179 olen = 0; 3180 dlen = 0; 3181 for (j=0; j<jend; j++) { 3182 if (*jj < rstart || *jj >= rend) olen++; 3183 else dlen++; 3184 jj++; 3185 } 3186 olens[i] = olen; 3187 dlens[i] = dlen; 3188 } 3189 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3190 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3191 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3192 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3193 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3194 ierr = PetscFree(dlens);CHKERRQ(ierr); 3195 } else { 3196 PetscInt ml,nl; 3197 3198 M = *newmat; 3199 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3200 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3201 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3202 /* 3203 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3204 rather than the slower MatSetValues(). 3205 */ 3206 M->was_assembled = PETSC_TRUE; 3207 M->assembled = PETSC_FALSE; 3208 } 3209 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3210 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3211 ii = aij->i; 3212 jj = aij->j; 3213 aa = aij->a; 3214 for (i=0; i<m; i++) { 3215 row = rstart + i; 3216 nz = ii[i+1] - ii[i]; 3217 cwork = jj; jj += nz; 3218 vwork = aa; aa += nz; 3219 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3220 } 3221 3222 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3223 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224 *newmat = M; 3225 3226 /* save submatrix used in processor for next request */ 3227 if (call == MAT_INITIAL_MATRIX) { 3228 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3229 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3230 } 3231 PetscFunctionReturn(0); 3232 } 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3236 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3237 { 3238 PetscInt m,cstart, cend,j,nnz,i,d; 3239 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3240 const PetscInt *JJ; 3241 PetscScalar *values; 3242 PetscErrorCode ierr; 3243 3244 PetscFunctionBegin; 3245 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3246 3247 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3248 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3249 m = B->rmap->n; 3250 cstart = B->cmap->rstart; 3251 cend = B->cmap->rend; 3252 rstart = B->rmap->rstart; 3253 3254 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3255 3256 #if defined(PETSC_USE_DEBUGGING) 3257 for (i=0; i<m; i++) { 3258 nnz = Ii[i+1]- Ii[i]; 3259 JJ = J + Ii[i]; 3260 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3261 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3262 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3263 } 3264 #endif 3265 3266 for (i=0; i<m; i++) { 3267 nnz = Ii[i+1]- Ii[i]; 3268 JJ = J + Ii[i]; 3269 nnz_max = PetscMax(nnz_max,nnz); 3270 d = 0; 3271 for (j=0; j<nnz; j++) { 3272 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3273 } 3274 d_nnz[i] = d; 3275 o_nnz[i] = nnz - d; 3276 } 3277 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3278 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3279 3280 if (v) values = (PetscScalar*)v; 3281 else { 3282 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3283 } 3284 3285 for (i=0; i<m; i++) { 3286 ii = i + rstart; 3287 nnz = Ii[i+1]- Ii[i]; 3288 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3289 } 3290 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 3293 if (!v) { 3294 ierr = PetscFree(values);CHKERRQ(ierr); 3295 } 3296 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3302 /*@ 3303 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3304 (the default parallel PETSc format). 3305 3306 Collective on MPI_Comm 3307 3308 Input Parameters: 3309 + B - the matrix 3310 . i - the indices into j for the start of each local row (starts with zero) 3311 . j - the column indices for each local row (starts with zero) 3312 - v - optional values in the matrix 3313 3314 Level: developer 3315 3316 Notes: 3317 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3318 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3319 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3320 3321 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3322 3323 The format which is used for the sparse matrix input, is equivalent to a 3324 row-major ordering.. i.e for the following matrix, the input data expected is 3325 as shown 3326 3327 $ 1 0 0 3328 $ 2 0 3 P0 3329 $ ------- 3330 $ 4 5 6 P1 3331 $ 3332 $ Process0 [P0]: rows_owned=[0,1] 3333 $ i = {0,1,3} [size = nrow+1 = 2+1] 3334 $ j = {0,0,2} [size = 3] 3335 $ v = {1,2,3} [size = 3] 3336 $ 3337 $ Process1 [P1]: rows_owned=[2] 3338 $ i = {0,3} [size = nrow+1 = 1+1] 3339 $ j = {0,1,2} [size = 3] 3340 $ v = {4,5,6} [size = 3] 3341 3342 .keywords: matrix, aij, compressed row, sparse, parallel 3343 3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3345 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3346 @*/ 3347 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3348 { 3349 PetscErrorCode ierr; 3350 3351 PetscFunctionBegin; 3352 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3353 PetscFunctionReturn(0); 3354 } 3355 3356 #undef __FUNCT__ 3357 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3358 /*@C 3359 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3360 (the default parallel PETSc format). For good matrix assembly performance 3361 the user should preallocate the matrix storage by setting the parameters 3362 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3363 performance can be increased by more than a factor of 50. 3364 3365 Collective on MPI_Comm 3366 3367 Input Parameters: 3368 + B - the matrix 3369 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3370 (same value is used for all local rows) 3371 . d_nnz - array containing the number of nonzeros in the various rows of the 3372 DIAGONAL portion of the local submatrix (possibly different for each row) 3373 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3374 The size of this array is equal to the number of local rows, i.e 'm'. 3375 For matrices that will be factored, you must leave room for (and set) 3376 the diagonal entry even if it is zero. 3377 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3378 submatrix (same value is used for all local rows). 3379 - o_nnz - array containing the number of nonzeros in the various rows of the 3380 OFF-DIAGONAL portion of the local submatrix (possibly different for 3381 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3382 structure. The size of this array is equal to the number 3383 of local rows, i.e 'm'. 3384 3385 If the *_nnz parameter is given then the *_nz parameter is ignored 3386 3387 The AIJ format (also called the Yale sparse matrix format or 3388 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3389 storage. The stored row and column indices begin with zero. 3390 See Users-Manual: ch_mat for details. 3391 3392 The parallel matrix is partitioned such that the first m0 rows belong to 3393 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3394 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3395 3396 The DIAGONAL portion of the local submatrix of a processor can be defined 3397 as the submatrix which is obtained by extraction the part corresponding to 3398 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3399 first row that belongs to the processor, r2 is the last row belonging to 3400 the this processor, and c1-c2 is range of indices of the local part of a 3401 vector suitable for applying the matrix to. This is an mxn matrix. In the 3402 common case of a square matrix, the row and column ranges are the same and 3403 the DIAGONAL part is also square. The remaining portion of the local 3404 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3405 3406 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3407 3408 You can call MatGetInfo() to get information on how effective the preallocation was; 3409 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3410 You can also run with the option -info and look for messages with the string 3411 malloc in them to see if additional memory allocation was needed. 3412 3413 Example usage: 3414 3415 Consider the following 8x8 matrix with 34 non-zero values, that is 3416 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3417 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3418 as follows: 3419 3420 .vb 3421 1 2 0 | 0 3 0 | 0 4 3422 Proc0 0 5 6 | 7 0 0 | 8 0 3423 9 0 10 | 11 0 0 | 12 0 3424 ------------------------------------- 3425 13 0 14 | 15 16 17 | 0 0 3426 Proc1 0 18 0 | 19 20 21 | 0 0 3427 0 0 0 | 22 23 0 | 24 0 3428 ------------------------------------- 3429 Proc2 25 26 27 | 0 0 28 | 29 0 3430 30 0 0 | 31 32 33 | 0 34 3431 .ve 3432 3433 This can be represented as a collection of submatrices as: 3434 3435 .vb 3436 A B C 3437 D E F 3438 G H I 3439 .ve 3440 3441 Where the submatrices A,B,C are owned by proc0, D,E,F are 3442 owned by proc1, G,H,I are owned by proc2. 3443 3444 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3445 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3446 The 'M','N' parameters are 8,8, and have the same values on all procs. 3447 3448 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3449 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3450 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3451 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3452 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3453 matrix, ans [DF] as another SeqAIJ matrix. 3454 3455 When d_nz, o_nz parameters are specified, d_nz storage elements are 3456 allocated for every row of the local diagonal submatrix, and o_nz 3457 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3458 One way to choose d_nz and o_nz is to use the max nonzerors per local 3459 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3460 In this case, the values of d_nz,o_nz are: 3461 .vb 3462 proc0 : dnz = 2, o_nz = 2 3463 proc1 : dnz = 3, o_nz = 2 3464 proc2 : dnz = 1, o_nz = 4 3465 .ve 3466 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3467 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3468 for proc3. i.e we are using 12+15+10=37 storage locations to store 3469 34 values. 3470 3471 When d_nnz, o_nnz parameters are specified, the storage is specified 3472 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3473 In the above case the values for d_nnz,o_nnz are: 3474 .vb 3475 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3476 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3477 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3478 .ve 3479 Here the space allocated is sum of all the above values i.e 34, and 3480 hence pre-allocation is perfect. 3481 3482 Level: intermediate 3483 3484 .keywords: matrix, aij, compressed row, sparse, parallel 3485 3486 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3487 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3488 @*/ 3489 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3490 { 3491 PetscErrorCode ierr; 3492 3493 PetscFunctionBegin; 3494 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3495 PetscValidType(B,1); 3496 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3497 PetscFunctionReturn(0); 3498 } 3499 3500 #undef __FUNCT__ 3501 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3502 /*@ 3503 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3504 CSR format the local rows. 3505 3506 Collective on MPI_Comm 3507 3508 Input Parameters: 3509 + comm - MPI communicator 3510 . m - number of local rows (Cannot be PETSC_DECIDE) 3511 . n - This value should be the same as the local size used in creating the 3512 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3513 calculated if N is given) For square matrices n is almost always m. 3514 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3515 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3516 . i - row indices 3517 . j - column indices 3518 - a - matrix values 3519 3520 Output Parameter: 3521 . mat - the matrix 3522 3523 Level: intermediate 3524 3525 Notes: 3526 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3527 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3528 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3529 3530 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3531 3532 The format which is used for the sparse matrix input, is equivalent to a 3533 row-major ordering.. i.e for the following matrix, the input data expected is 3534 as shown 3535 3536 $ 1 0 0 3537 $ 2 0 3 P0 3538 $ ------- 3539 $ 4 5 6 P1 3540 $ 3541 $ Process0 [P0]: rows_owned=[0,1] 3542 $ i = {0,1,3} [size = nrow+1 = 2+1] 3543 $ j = {0,0,2} [size = 3] 3544 $ v = {1,2,3} [size = 3] 3545 $ 3546 $ Process1 [P1]: rows_owned=[2] 3547 $ i = {0,3} [size = nrow+1 = 1+1] 3548 $ j = {0,1,2} [size = 3] 3549 $ v = {4,5,6} [size = 3] 3550 3551 .keywords: matrix, aij, compressed row, sparse, parallel 3552 3553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3554 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3555 @*/ 3556 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3557 { 3558 PetscErrorCode ierr; 3559 3560 PetscFunctionBegin; 3561 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3562 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3563 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3564 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3565 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3566 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3567 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3568 PetscFunctionReturn(0); 3569 } 3570 3571 #undef __FUNCT__ 3572 #define __FUNCT__ "MatCreateAIJ" 3573 /*@C 3574 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3575 (the default parallel PETSc format). For good matrix assembly performance 3576 the user should preallocate the matrix storage by setting the parameters 3577 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3578 performance can be increased by more than a factor of 50. 3579 3580 Collective on MPI_Comm 3581 3582 Input Parameters: 3583 + comm - MPI communicator 3584 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3585 This value should be the same as the local size used in creating the 3586 y vector for the matrix-vector product y = Ax. 3587 . n - This value should be the same as the local size used in creating the 3588 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3589 calculated if N is given) For square matrices n is almost always m. 3590 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3591 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3592 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3593 (same value is used for all local rows) 3594 . d_nnz - array containing the number of nonzeros in the various rows of the 3595 DIAGONAL portion of the local submatrix (possibly different for each row) 3596 or NULL, if d_nz is used to specify the nonzero structure. 3597 The size of this array is equal to the number of local rows, i.e 'm'. 3598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3599 submatrix (same value is used for all local rows). 3600 - o_nnz - array containing the number of nonzeros in the various rows of the 3601 OFF-DIAGONAL portion of the local submatrix (possibly different for 3602 each row) or NULL, if o_nz is used to specify the nonzero 3603 structure. The size of this array is equal to the number 3604 of local rows, i.e 'm'. 3605 3606 Output Parameter: 3607 . A - the matrix 3608 3609 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3610 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3611 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3612 3613 Notes: 3614 If the *_nnz parameter is given then the *_nz parameter is ignored 3615 3616 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3617 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3618 storage requirements for this matrix. 3619 3620 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3621 processor than it must be used on all processors that share the object for 3622 that argument. 3623 3624 The user MUST specify either the local or global matrix dimensions 3625 (possibly both). 3626 3627 The parallel matrix is partitioned across processors such that the 3628 first m0 rows belong to process 0, the next m1 rows belong to 3629 process 1, the next m2 rows belong to process 2 etc.. where 3630 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3631 values corresponding to [m x N] submatrix. 3632 3633 The columns are logically partitioned with the n0 columns belonging 3634 to 0th partition, the next n1 columns belonging to the next 3635 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3636 3637 The DIAGONAL portion of the local submatrix on any given processor 3638 is the submatrix corresponding to the rows and columns m,n 3639 corresponding to the given processor. i.e diagonal matrix on 3640 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3641 etc. The remaining portion of the local submatrix [m x (N-n)] 3642 constitute the OFF-DIAGONAL portion. The example below better 3643 illustrates this concept. 3644 3645 For a square global matrix we define each processor's diagonal portion 3646 to be its local rows and the corresponding columns (a square submatrix); 3647 each processor's off-diagonal portion encompasses the remainder of the 3648 local matrix (a rectangular submatrix). 3649 3650 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3651 3652 When calling this routine with a single process communicator, a matrix of 3653 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3654 type of communicator, use the construction mechanism: 3655 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3656 3657 By default, this format uses inodes (identical nodes) when possible. 3658 We search for consecutive rows with the same nonzero structure, thereby 3659 reusing matrix information to achieve increased efficiency. 3660 3661 Options Database Keys: 3662 + -mat_no_inode - Do not use inodes 3663 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3664 - -mat_aij_oneindex - Internally use indexing starting at 1 3665 rather than 0. Note that when calling MatSetValues(), 3666 the user still MUST index entries starting at 0! 3667 3668 3669 Example usage: 3670 3671 Consider the following 8x8 matrix with 34 non-zero values, that is 3672 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3673 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3674 as follows: 3675 3676 .vb 3677 1 2 0 | 0 3 0 | 0 4 3678 Proc0 0 5 6 | 7 0 0 | 8 0 3679 9 0 10 | 11 0 0 | 12 0 3680 ------------------------------------- 3681 13 0 14 | 15 16 17 | 0 0 3682 Proc1 0 18 0 | 19 20 21 | 0 0 3683 0 0 0 | 22 23 0 | 24 0 3684 ------------------------------------- 3685 Proc2 25 26 27 | 0 0 28 | 29 0 3686 30 0 0 | 31 32 33 | 0 34 3687 .ve 3688 3689 This can be represented as a collection of submatrices as: 3690 3691 .vb 3692 A B C 3693 D E F 3694 G H I 3695 .ve 3696 3697 Where the submatrices A,B,C are owned by proc0, D,E,F are 3698 owned by proc1, G,H,I are owned by proc2. 3699 3700 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3701 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'M','N' parameters are 8,8, and have the same values on all procs. 3703 3704 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3705 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3706 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3707 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3708 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3709 matrix, ans [DF] as another SeqAIJ matrix. 3710 3711 When d_nz, o_nz parameters are specified, d_nz storage elements are 3712 allocated for every row of the local diagonal submatrix, and o_nz 3713 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3714 One way to choose d_nz and o_nz is to use the max nonzerors per local 3715 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3716 In this case, the values of d_nz,o_nz are: 3717 .vb 3718 proc0 : dnz = 2, o_nz = 2 3719 proc1 : dnz = 3, o_nz = 2 3720 proc2 : dnz = 1, o_nz = 4 3721 .ve 3722 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3723 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3724 for proc3. i.e we are using 12+15+10=37 storage locations to store 3725 34 values. 3726 3727 When d_nnz, o_nnz parameters are specified, the storage is specified 3728 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3729 In the above case the values for d_nnz,o_nnz are: 3730 .vb 3731 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3732 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3733 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3734 .ve 3735 Here the space allocated is sum of all the above values i.e 34, and 3736 hence pre-allocation is perfect. 3737 3738 Level: intermediate 3739 3740 .keywords: matrix, aij, compressed row, sparse, parallel 3741 3742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3743 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3744 @*/ 3745 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3746 { 3747 PetscErrorCode ierr; 3748 PetscMPIInt size; 3749 3750 PetscFunctionBegin; 3751 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3752 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3753 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3754 if (size > 1) { 3755 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3756 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3757 } else { 3758 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3759 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3766 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3767 { 3768 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3769 PetscBool flg; 3770 PetscErrorCode ierr; 3771 3772 PetscFunctionBegin; 3773 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3774 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3775 if (Ad) *Ad = a->A; 3776 if (Ao) *Ao = a->B; 3777 if (colmap) *colmap = a->garray; 3778 PetscFunctionReturn(0); 3779 } 3780 3781 #undef __FUNCT__ 3782 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3783 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3784 { 3785 PetscErrorCode ierr; 3786 PetscInt m,N,i,rstart,nnz,Ii; 3787 PetscInt *indx; 3788 PetscScalar *values; 3789 3790 PetscFunctionBegin; 3791 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3792 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3793 PetscInt *dnz,*onz,sum,bs,cbs; 3794 3795 if (n == PETSC_DECIDE) { 3796 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3797 } 3798 /* Check sum(n) = N */ 3799 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3800 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3801 3802 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3803 rstart -= m; 3804 3805 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3806 for (i=0; i<m; i++) { 3807 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3808 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3809 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3810 } 3811 3812 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3813 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3814 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3815 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3816 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3817 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3818 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3819 } 3820 3821 /* numeric phase */ 3822 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3823 for (i=0; i<m; i++) { 3824 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3825 Ii = i + rstart; 3826 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3827 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3828 } 3829 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 PetscFunctionReturn(0); 3832 } 3833 3834 #undef __FUNCT__ 3835 #define __FUNCT__ "MatFileSplit" 3836 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3837 { 3838 PetscErrorCode ierr; 3839 PetscMPIInt rank; 3840 PetscInt m,N,i,rstart,nnz; 3841 size_t len; 3842 const PetscInt *indx; 3843 PetscViewer out; 3844 char *name; 3845 Mat B; 3846 const PetscScalar *values; 3847 3848 PetscFunctionBegin; 3849 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3850 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3851 /* Should this be the type of the diagonal block of A? */ 3852 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3853 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3854 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3855 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3856 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3857 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3858 for (i=0; i<m; i++) { 3859 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3860 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3861 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3862 } 3863 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3864 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 3866 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3867 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3868 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3869 sprintf(name,"%s.%d",outfile,rank); 3870 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3871 ierr = PetscFree(name);CHKERRQ(ierr); 3872 ierr = MatView(B,out);CHKERRQ(ierr); 3873 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3874 ierr = MatDestroy(&B);CHKERRQ(ierr); 3875 PetscFunctionReturn(0); 3876 } 3877 3878 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3879 #undef __FUNCT__ 3880 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3881 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3882 { 3883 PetscErrorCode ierr; 3884 Mat_Merge_SeqsToMPI *merge; 3885 PetscContainer container; 3886 3887 PetscFunctionBegin; 3888 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3889 if (container) { 3890 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3891 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3892 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3893 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3894 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3903 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3904 ierr = PetscFree(merge);CHKERRQ(ierr); 3905 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3906 } 3907 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3908 PetscFunctionReturn(0); 3909 } 3910 3911 #include <../src/mat/utils/freespace.h> 3912 #include <petscbt.h> 3913 3914 #undef __FUNCT__ 3915 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3916 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3917 { 3918 PetscErrorCode ierr; 3919 MPI_Comm comm; 3920 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3921 PetscMPIInt size,rank,taga,*len_s; 3922 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3923 PetscInt proc,m; 3924 PetscInt **buf_ri,**buf_rj; 3925 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3926 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3927 MPI_Request *s_waits,*r_waits; 3928 MPI_Status *status; 3929 MatScalar *aa=a->a; 3930 MatScalar **abuf_r,*ba_i; 3931 Mat_Merge_SeqsToMPI *merge; 3932 PetscContainer container; 3933 3934 PetscFunctionBegin; 3935 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3936 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3937 3938 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3939 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3940 3941 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3942 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3943 3944 bi = merge->bi; 3945 bj = merge->bj; 3946 buf_ri = merge->buf_ri; 3947 buf_rj = merge->buf_rj; 3948 3949 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3950 owners = merge->rowmap->range; 3951 len_s = merge->len_s; 3952 3953 /* send and recv matrix values */ 3954 /*-----------------------------*/ 3955 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3956 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3957 3958 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3959 for (proc=0,k=0; proc<size; proc++) { 3960 if (!len_s[proc]) continue; 3961 i = owners[proc]; 3962 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3963 k++; 3964 } 3965 3966 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3967 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3968 ierr = PetscFree(status);CHKERRQ(ierr); 3969 3970 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3971 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3972 3973 /* insert mat values of mpimat */ 3974 /*----------------------------*/ 3975 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3976 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3977 3978 for (k=0; k<merge->nrecv; k++) { 3979 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3980 nrows = *(buf_ri_k[k]); 3981 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3982 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3983 } 3984 3985 /* set values of ba */ 3986 m = merge->rowmap->n; 3987 for (i=0; i<m; i++) { 3988 arow = owners[rank] + i; 3989 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3990 bnzi = bi[i+1] - bi[i]; 3991 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3992 3993 /* add local non-zero vals of this proc's seqmat into ba */ 3994 anzi = ai[arow+1] - ai[arow]; 3995 aj = a->j + ai[arow]; 3996 aa = a->a + ai[arow]; 3997 nextaj = 0; 3998 for (j=0; nextaj<anzi; j++) { 3999 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4000 ba_i[j] += aa[nextaj++]; 4001 } 4002 } 4003 4004 /* add received vals into ba */ 4005 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4006 /* i-th row */ 4007 if (i == *nextrow[k]) { 4008 anzi = *(nextai[k]+1) - *nextai[k]; 4009 aj = buf_rj[k] + *(nextai[k]); 4010 aa = abuf_r[k] + *(nextai[k]); 4011 nextaj = 0; 4012 for (j=0; nextaj<anzi; j++) { 4013 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4014 ba_i[j] += aa[nextaj++]; 4015 } 4016 } 4017 nextrow[k]++; nextai[k]++; 4018 } 4019 } 4020 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4021 } 4022 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4023 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4024 4025 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4026 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4027 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4028 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4029 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4030 PetscFunctionReturn(0); 4031 } 4032 4033 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4034 4035 #undef __FUNCT__ 4036 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4037 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4038 { 4039 PetscErrorCode ierr; 4040 Mat B_mpi; 4041 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4042 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4043 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4044 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4045 PetscInt len,proc,*dnz,*onz,bs,cbs; 4046 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4047 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4048 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4049 MPI_Status *status; 4050 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4051 PetscBT lnkbt; 4052 Mat_Merge_SeqsToMPI *merge; 4053 PetscContainer container; 4054 4055 PetscFunctionBegin; 4056 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4057 4058 /* make sure it is a PETSc comm */ 4059 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4061 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4062 4063 ierr = PetscNew(&merge);CHKERRQ(ierr); 4064 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4065 4066 /* determine row ownership */ 4067 /*---------------------------------------------------------*/ 4068 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4069 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4070 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4071 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4072 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4073 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4074 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4075 4076 m = merge->rowmap->n; 4077 owners = merge->rowmap->range; 4078 4079 /* determine the number of messages to send, their lengths */ 4080 /*---------------------------------------------------------*/ 4081 len_s = merge->len_s; 4082 4083 len = 0; /* length of buf_si[] */ 4084 merge->nsend = 0; 4085 for (proc=0; proc<size; proc++) { 4086 len_si[proc] = 0; 4087 if (proc == rank) { 4088 len_s[proc] = 0; 4089 } else { 4090 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4091 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4092 } 4093 if (len_s[proc]) { 4094 merge->nsend++; 4095 nrows = 0; 4096 for (i=owners[proc]; i<owners[proc+1]; i++) { 4097 if (ai[i+1] > ai[i]) nrows++; 4098 } 4099 len_si[proc] = 2*(nrows+1); 4100 len += len_si[proc]; 4101 } 4102 } 4103 4104 /* determine the number and length of messages to receive for ij-structure */ 4105 /*-------------------------------------------------------------------------*/ 4106 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4107 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4108 4109 /* post the Irecv of j-structure */ 4110 /*-------------------------------*/ 4111 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4112 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4113 4114 /* post the Isend of j-structure */ 4115 /*--------------------------------*/ 4116 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4117 4118 for (proc=0, k=0; proc<size; proc++) { 4119 if (!len_s[proc]) continue; 4120 i = owners[proc]; 4121 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4122 k++; 4123 } 4124 4125 /* receives and sends of j-structure are complete */ 4126 /*------------------------------------------------*/ 4127 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4128 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4129 4130 /* send and recv i-structure */ 4131 /*---------------------------*/ 4132 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4133 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4134 4135 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4136 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4137 for (proc=0,k=0; proc<size; proc++) { 4138 if (!len_s[proc]) continue; 4139 /* form outgoing message for i-structure: 4140 buf_si[0]: nrows to be sent 4141 [1:nrows]: row index (global) 4142 [nrows+1:2*nrows+1]: i-structure index 4143 */ 4144 /*-------------------------------------------*/ 4145 nrows = len_si[proc]/2 - 1; 4146 buf_si_i = buf_si + nrows+1; 4147 buf_si[0] = nrows; 4148 buf_si_i[0] = 0; 4149 nrows = 0; 4150 for (i=owners[proc]; i<owners[proc+1]; i++) { 4151 anzi = ai[i+1] - ai[i]; 4152 if (anzi) { 4153 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4154 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4155 nrows++; 4156 } 4157 } 4158 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4159 k++; 4160 buf_si += len_si[proc]; 4161 } 4162 4163 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4164 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4165 4166 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4167 for (i=0; i<merge->nrecv; i++) { 4168 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4169 } 4170 4171 ierr = PetscFree(len_si);CHKERRQ(ierr); 4172 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4173 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4174 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4175 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4176 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4177 ierr = PetscFree(status);CHKERRQ(ierr); 4178 4179 /* compute a local seq matrix in each processor */ 4180 /*----------------------------------------------*/ 4181 /* allocate bi array and free space for accumulating nonzero column info */ 4182 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4183 bi[0] = 0; 4184 4185 /* create and initialize a linked list */ 4186 nlnk = N+1; 4187 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4188 4189 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4190 len = ai[owners[rank+1]] - ai[owners[rank]]; 4191 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4192 4193 current_space = free_space; 4194 4195 /* determine symbolic info for each local row */ 4196 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4197 4198 for (k=0; k<merge->nrecv; k++) { 4199 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4200 nrows = *buf_ri_k[k]; 4201 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4202 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4203 } 4204 4205 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4206 len = 0; 4207 for (i=0; i<m; i++) { 4208 bnzi = 0; 4209 /* add local non-zero cols of this proc's seqmat into lnk */ 4210 arow = owners[rank] + i; 4211 anzi = ai[arow+1] - ai[arow]; 4212 aj = a->j + ai[arow]; 4213 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4214 bnzi += nlnk; 4215 /* add received col data into lnk */ 4216 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4217 if (i == *nextrow[k]) { /* i-th row */ 4218 anzi = *(nextai[k]+1) - *nextai[k]; 4219 aj = buf_rj[k] + *nextai[k]; 4220 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4221 bnzi += nlnk; 4222 nextrow[k]++; nextai[k]++; 4223 } 4224 } 4225 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4226 4227 /* if free space is not available, make more free space */ 4228 if (current_space->local_remaining<bnzi) { 4229 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4230 nspacedouble++; 4231 } 4232 /* copy data into free space, then initialize lnk */ 4233 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4234 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4235 4236 current_space->array += bnzi; 4237 current_space->local_used += bnzi; 4238 current_space->local_remaining -= bnzi; 4239 4240 bi[i+1] = bi[i] + bnzi; 4241 } 4242 4243 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4244 4245 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4246 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4247 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4248 4249 /* create symbolic parallel matrix B_mpi */ 4250 /*---------------------------------------*/ 4251 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4252 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4253 if (n==PETSC_DECIDE) { 4254 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4255 } else { 4256 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4257 } 4258 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4259 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4260 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4261 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4262 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4263 4264 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4265 B_mpi->assembled = PETSC_FALSE; 4266 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4267 merge->bi = bi; 4268 merge->bj = bj; 4269 merge->buf_ri = buf_ri; 4270 merge->buf_rj = buf_rj; 4271 merge->coi = NULL; 4272 merge->coj = NULL; 4273 merge->owners_co = NULL; 4274 4275 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4276 4277 /* attach the supporting struct to B_mpi for reuse */ 4278 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4279 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4280 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4281 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4282 *mpimat = B_mpi; 4283 4284 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4285 PetscFunctionReturn(0); 4286 } 4287 4288 #undef __FUNCT__ 4289 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4290 /*@C 4291 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4292 matrices from each processor 4293 4294 Collective on MPI_Comm 4295 4296 Input Parameters: 4297 + comm - the communicators the parallel matrix will live on 4298 . seqmat - the input sequential matrices 4299 . m - number of local rows (or PETSC_DECIDE) 4300 . n - number of local columns (or PETSC_DECIDE) 4301 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4302 4303 Output Parameter: 4304 . mpimat - the parallel matrix generated 4305 4306 Level: advanced 4307 4308 Notes: 4309 The dimensions of the sequential matrix in each processor MUST be the same. 4310 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4311 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4312 @*/ 4313 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4314 { 4315 PetscErrorCode ierr; 4316 PetscMPIInt size; 4317 4318 PetscFunctionBegin; 4319 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4320 if (size == 1) { 4321 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4322 if (scall == MAT_INITIAL_MATRIX) { 4323 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4324 } else { 4325 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4326 } 4327 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4331 if (scall == MAT_INITIAL_MATRIX) { 4332 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4333 } 4334 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4335 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4336 PetscFunctionReturn(0); 4337 } 4338 4339 #undef __FUNCT__ 4340 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4341 /*@ 4342 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4343 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4344 with MatGetSize() 4345 4346 Not Collective 4347 4348 Input Parameters: 4349 + A - the matrix 4350 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4351 4352 Output Parameter: 4353 . A_loc - the local sequential matrix generated 4354 4355 Level: developer 4356 4357 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4358 4359 @*/ 4360 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4361 { 4362 PetscErrorCode ierr; 4363 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4364 Mat_SeqAIJ *mat,*a,*b; 4365 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4366 MatScalar *aa,*ba,*cam; 4367 PetscScalar *ca; 4368 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4369 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4370 PetscBool match; 4371 MPI_Comm comm; 4372 PetscMPIInt size; 4373 4374 PetscFunctionBegin; 4375 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4376 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4377 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4378 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4379 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4380 4381 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4382 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4383 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4384 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4385 aa = a->a; ba = b->a; 4386 if (scall == MAT_INITIAL_MATRIX) { 4387 if (size == 1) { 4388 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4389 PetscFunctionReturn(0); 4390 } 4391 4392 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4393 ci[0] = 0; 4394 for (i=0; i<am; i++) { 4395 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4396 } 4397 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4398 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4399 k = 0; 4400 for (i=0; i<am; i++) { 4401 ncols_o = bi[i+1] - bi[i]; 4402 ncols_d = ai[i+1] - ai[i]; 4403 /* off-diagonal portion of A */ 4404 for (jo=0; jo<ncols_o; jo++) { 4405 col = cmap[*bj]; 4406 if (col >= cstart) break; 4407 cj[k] = col; bj++; 4408 ca[k++] = *ba++; 4409 } 4410 /* diagonal portion of A */ 4411 for (j=0; j<ncols_d; j++) { 4412 cj[k] = cstart + *aj++; 4413 ca[k++] = *aa++; 4414 } 4415 /* off-diagonal portion of A */ 4416 for (j=jo; j<ncols_o; j++) { 4417 cj[k] = cmap[*bj++]; 4418 ca[k++] = *ba++; 4419 } 4420 } 4421 /* put together the new matrix */ 4422 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4423 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4424 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4425 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4426 mat->free_a = PETSC_TRUE; 4427 mat->free_ij = PETSC_TRUE; 4428 mat->nonew = 0; 4429 } else if (scall == MAT_REUSE_MATRIX) { 4430 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4431 ci = mat->i; cj = mat->j; cam = mat->a; 4432 for (i=0; i<am; i++) { 4433 /* off-diagonal portion of A */ 4434 ncols_o = bi[i+1] - bi[i]; 4435 for (jo=0; jo<ncols_o; jo++) { 4436 col = cmap[*bj]; 4437 if (col >= cstart) break; 4438 *cam++ = *ba++; bj++; 4439 } 4440 /* diagonal portion of A */ 4441 ncols_d = ai[i+1] - ai[i]; 4442 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4443 /* off-diagonal portion of A */ 4444 for (j=jo; j<ncols_o; j++) { 4445 *cam++ = *ba++; bj++; 4446 } 4447 } 4448 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4449 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4450 PetscFunctionReturn(0); 4451 } 4452 4453 #undef __FUNCT__ 4454 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4455 /*@C 4456 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4457 4458 Not Collective 4459 4460 Input Parameters: 4461 + A - the matrix 4462 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4463 - row, col - index sets of rows and columns to extract (or NULL) 4464 4465 Output Parameter: 4466 . A_loc - the local sequential matrix generated 4467 4468 Level: developer 4469 4470 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4471 4472 @*/ 4473 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4474 { 4475 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4476 PetscErrorCode ierr; 4477 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4478 IS isrowa,iscola; 4479 Mat *aloc; 4480 PetscBool match; 4481 4482 PetscFunctionBegin; 4483 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4484 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4485 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4486 if (!row) { 4487 start = A->rmap->rstart; end = A->rmap->rend; 4488 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4489 } else { 4490 isrowa = *row; 4491 } 4492 if (!col) { 4493 start = A->cmap->rstart; 4494 cmap = a->garray; 4495 nzA = a->A->cmap->n; 4496 nzB = a->B->cmap->n; 4497 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4498 ncols = 0; 4499 for (i=0; i<nzB; i++) { 4500 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4501 else break; 4502 } 4503 imark = i; 4504 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4505 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4506 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4507 } else { 4508 iscola = *col; 4509 } 4510 if (scall != MAT_INITIAL_MATRIX) { 4511 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4512 aloc[0] = *A_loc; 4513 } 4514 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4515 *A_loc = aloc[0]; 4516 ierr = PetscFree(aloc);CHKERRQ(ierr); 4517 if (!row) { 4518 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4519 } 4520 if (!col) { 4521 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4522 } 4523 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4524 PetscFunctionReturn(0); 4525 } 4526 4527 #undef __FUNCT__ 4528 #define __FUNCT__ "MatGetBrowsOfAcols" 4529 /*@C 4530 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4531 4532 Collective on Mat 4533 4534 Input Parameters: 4535 + A,B - the matrices in mpiaij format 4536 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4537 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4538 4539 Output Parameter: 4540 + rowb, colb - index sets of rows and columns of B to extract 4541 - B_seq - the sequential matrix generated 4542 4543 Level: developer 4544 4545 @*/ 4546 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4547 { 4548 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4549 PetscErrorCode ierr; 4550 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4551 IS isrowb,iscolb; 4552 Mat *bseq=NULL; 4553 4554 PetscFunctionBegin; 4555 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4556 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4557 } 4558 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4559 4560 if (scall == MAT_INITIAL_MATRIX) { 4561 start = A->cmap->rstart; 4562 cmap = a->garray; 4563 nzA = a->A->cmap->n; 4564 nzB = a->B->cmap->n; 4565 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4566 ncols = 0; 4567 for (i=0; i<nzB; i++) { /* row < local row index */ 4568 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4569 else break; 4570 } 4571 imark = i; 4572 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4573 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4574 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4575 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4576 } else { 4577 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4578 isrowb = *rowb; iscolb = *colb; 4579 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4580 bseq[0] = *B_seq; 4581 } 4582 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4583 *B_seq = bseq[0]; 4584 ierr = PetscFree(bseq);CHKERRQ(ierr); 4585 if (!rowb) { 4586 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4587 } else { 4588 *rowb = isrowb; 4589 } 4590 if (!colb) { 4591 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4592 } else { 4593 *colb = iscolb; 4594 } 4595 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4596 PetscFunctionReturn(0); 4597 } 4598 4599 #undef __FUNCT__ 4600 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4601 /* 4602 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4603 of the OFF-DIAGONAL portion of local A 4604 4605 Collective on Mat 4606 4607 Input Parameters: 4608 + A,B - the matrices in mpiaij format 4609 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4610 4611 Output Parameter: 4612 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4613 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4614 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4615 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4616 4617 Level: developer 4618 4619 */ 4620 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4621 { 4622 VecScatter_MPI_General *gen_to,*gen_from; 4623 PetscErrorCode ierr; 4624 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4625 Mat_SeqAIJ *b_oth; 4626 VecScatter ctx =a->Mvctx; 4627 MPI_Comm comm; 4628 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4629 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4630 PetscScalar *rvalues,*svalues; 4631 MatScalar *b_otha,*bufa,*bufA; 4632 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4633 MPI_Request *rwaits = NULL,*swaits = NULL; 4634 MPI_Status *sstatus,rstatus; 4635 PetscMPIInt jj,size; 4636 PetscInt *cols,sbs,rbs; 4637 PetscScalar *vals; 4638 4639 PetscFunctionBegin; 4640 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4641 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4642 4643 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4644 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4645 } 4646 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4647 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4648 4649 gen_to = (VecScatter_MPI_General*)ctx->todata; 4650 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4651 rvalues = gen_from->values; /* holds the length of receiving row */ 4652 svalues = gen_to->values; /* holds the length of sending row */ 4653 nrecvs = gen_from->n; 4654 nsends = gen_to->n; 4655 4656 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4657 srow = gen_to->indices; /* local row index to be sent */ 4658 sstarts = gen_to->starts; 4659 sprocs = gen_to->procs; 4660 sstatus = gen_to->sstatus; 4661 sbs = gen_to->bs; 4662 rstarts = gen_from->starts; 4663 rprocs = gen_from->procs; 4664 rbs = gen_from->bs; 4665 4666 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4667 if (scall == MAT_INITIAL_MATRIX) { 4668 /* i-array */ 4669 /*---------*/ 4670 /* post receives */ 4671 for (i=0; i<nrecvs; i++) { 4672 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4673 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4674 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4675 } 4676 4677 /* pack the outgoing message */ 4678 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4679 4680 sstartsj[0] = 0; 4681 rstartsj[0] = 0; 4682 len = 0; /* total length of j or a array to be sent */ 4683 k = 0; 4684 for (i=0; i<nsends; i++) { 4685 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4686 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4687 for (j=0; j<nrows; j++) { 4688 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4689 for (l=0; l<sbs; l++) { 4690 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4691 4692 rowlen[j*sbs+l] = ncols; 4693 4694 len += ncols; 4695 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4696 } 4697 k++; 4698 } 4699 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4700 4701 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4702 } 4703 /* recvs and sends of i-array are completed */ 4704 i = nrecvs; 4705 while (i--) { 4706 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4707 } 4708 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4709 4710 /* allocate buffers for sending j and a arrays */ 4711 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4712 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4713 4714 /* create i-array of B_oth */ 4715 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4716 4717 b_othi[0] = 0; 4718 len = 0; /* total length of j or a array to be received */ 4719 k = 0; 4720 for (i=0; i<nrecvs; i++) { 4721 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4722 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4723 for (j=0; j<nrows; j++) { 4724 b_othi[k+1] = b_othi[k] + rowlen[j]; 4725 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4726 k++; 4727 } 4728 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4729 } 4730 4731 /* allocate space for j and a arrrays of B_oth */ 4732 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4733 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4734 4735 /* j-array */ 4736 /*---------*/ 4737 /* post receives of j-array */ 4738 for (i=0; i<nrecvs; i++) { 4739 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4740 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4741 } 4742 4743 /* pack the outgoing message j-array */ 4744 k = 0; 4745 for (i=0; i<nsends; i++) { 4746 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4747 bufJ = bufj+sstartsj[i]; 4748 for (j=0; j<nrows; j++) { 4749 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4750 for (ll=0; ll<sbs; ll++) { 4751 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4752 for (l=0; l<ncols; l++) { 4753 *bufJ++ = cols[l]; 4754 } 4755 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4756 } 4757 } 4758 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4759 } 4760 4761 /* recvs and sends of j-array are completed */ 4762 i = nrecvs; 4763 while (i--) { 4764 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4765 } 4766 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4767 } else if (scall == MAT_REUSE_MATRIX) { 4768 sstartsj = *startsj_s; 4769 rstartsj = *startsj_r; 4770 bufa = *bufa_ptr; 4771 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4772 b_otha = b_oth->a; 4773 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4774 4775 /* a-array */ 4776 /*---------*/ 4777 /* post receives of a-array */ 4778 for (i=0; i<nrecvs; i++) { 4779 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4780 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4781 } 4782 4783 /* pack the outgoing message a-array */ 4784 k = 0; 4785 for (i=0; i<nsends; i++) { 4786 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4787 bufA = bufa+sstartsj[i]; 4788 for (j=0; j<nrows; j++) { 4789 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4790 for (ll=0; ll<sbs; ll++) { 4791 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4792 for (l=0; l<ncols; l++) { 4793 *bufA++ = vals[l]; 4794 } 4795 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4796 } 4797 } 4798 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4799 } 4800 /* recvs and sends of a-array are completed */ 4801 i = nrecvs; 4802 while (i--) { 4803 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4804 } 4805 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4806 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4807 4808 if (scall == MAT_INITIAL_MATRIX) { 4809 /* put together the new matrix */ 4810 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4811 4812 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4813 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4814 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4815 b_oth->free_a = PETSC_TRUE; 4816 b_oth->free_ij = PETSC_TRUE; 4817 b_oth->nonew = 0; 4818 4819 ierr = PetscFree(bufj);CHKERRQ(ierr); 4820 if (!startsj_s || !bufa_ptr) { 4821 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4822 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4823 } else { 4824 *startsj_s = sstartsj; 4825 *startsj_r = rstartsj; 4826 *bufa_ptr = bufa; 4827 } 4828 } 4829 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4830 PetscFunctionReturn(0); 4831 } 4832 4833 #undef __FUNCT__ 4834 #define __FUNCT__ "MatGetCommunicationStructs" 4835 /*@C 4836 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4837 4838 Not Collective 4839 4840 Input Parameters: 4841 . A - The matrix in mpiaij format 4842 4843 Output Parameter: 4844 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4845 . colmap - A map from global column index to local index into lvec 4846 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4847 4848 Level: developer 4849 4850 @*/ 4851 #if defined(PETSC_USE_CTABLE) 4852 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4853 #else 4854 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4855 #endif 4856 { 4857 Mat_MPIAIJ *a; 4858 4859 PetscFunctionBegin; 4860 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4861 PetscValidPointer(lvec, 2); 4862 PetscValidPointer(colmap, 3); 4863 PetscValidPointer(multScatter, 4); 4864 a = (Mat_MPIAIJ*) A->data; 4865 if (lvec) *lvec = a->lvec; 4866 if (colmap) *colmap = a->colmap; 4867 if (multScatter) *multScatter = a->Mvctx; 4868 PetscFunctionReturn(0); 4869 } 4870 4871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4872 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4874 #if defined(PETSC_HAVE_ELEMENTAL) 4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4876 #endif 4877 4878 #undef __FUNCT__ 4879 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4880 /* 4881 Computes (B'*A')' since computing B*A directly is untenable 4882 4883 n p p 4884 ( ) ( ) ( ) 4885 m ( A ) * n ( B ) = m ( C ) 4886 ( ) ( ) ( ) 4887 4888 */ 4889 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4890 { 4891 PetscErrorCode ierr; 4892 Mat At,Bt,Ct; 4893 4894 PetscFunctionBegin; 4895 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4896 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4897 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4898 ierr = MatDestroy(&At);CHKERRQ(ierr); 4899 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4900 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4901 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4902 PetscFunctionReturn(0); 4903 } 4904 4905 #undef __FUNCT__ 4906 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4907 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4908 { 4909 PetscErrorCode ierr; 4910 PetscInt m=A->rmap->n,n=B->cmap->n; 4911 Mat Cmat; 4912 4913 PetscFunctionBegin; 4914 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4915 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4916 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4917 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4918 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4919 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4920 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4921 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4922 4923 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4924 4925 *C = Cmat; 4926 PetscFunctionReturn(0); 4927 } 4928 4929 /* ----------------------------------------------------------------*/ 4930 #undef __FUNCT__ 4931 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4932 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4933 { 4934 PetscErrorCode ierr; 4935 4936 PetscFunctionBegin; 4937 if (scall == MAT_INITIAL_MATRIX) { 4938 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4939 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4940 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4941 } 4942 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4943 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4944 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 /*MC 4949 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4950 4951 Options Database Keys: 4952 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4953 4954 Level: beginner 4955 4956 .seealso: MatCreateAIJ() 4957 M*/ 4958 4959 #undef __FUNCT__ 4960 #define __FUNCT__ "MatCreate_MPIAIJ" 4961 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4962 { 4963 Mat_MPIAIJ *b; 4964 PetscErrorCode ierr; 4965 PetscMPIInt size; 4966 4967 PetscFunctionBegin; 4968 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4969 4970 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4971 B->data = (void*)b; 4972 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4973 B->assembled = PETSC_FALSE; 4974 B->insertmode = NOT_SET_VALUES; 4975 b->size = size; 4976 4977 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4978 4979 /* build cache for off array entries formed */ 4980 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4981 4982 b->donotstash = PETSC_FALSE; 4983 b->colmap = 0; 4984 b->garray = 0; 4985 b->roworiented = PETSC_TRUE; 4986 4987 /* stuff used for matrix vector multiply */ 4988 b->lvec = NULL; 4989 b->Mvctx = NULL; 4990 4991 /* stuff for MatGetRow() */ 4992 b->rowindices = 0; 4993 b->rowvalues = 0; 4994 b->getrowactive = PETSC_FALSE; 4995 4996 /* flexible pointer used in CUSP/CUSPARSE classes */ 4997 b->spptr = NULL; 4998 4999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5009 #if defined(PETSC_HAVE_ELEMENTAL) 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5011 #endif 5012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5015 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5016 PetscFunctionReturn(0); 5017 } 5018 5019 #undef __FUNCT__ 5020 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5021 /*@C 5022 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5023 and "off-diagonal" part of the matrix in CSR format. 5024 5025 Collective on MPI_Comm 5026 5027 Input Parameters: 5028 + comm - MPI communicator 5029 . m - number of local rows (Cannot be PETSC_DECIDE) 5030 . n - This value should be the same as the local size used in creating the 5031 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5032 calculated if N is given) For square matrices n is almost always m. 5033 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5034 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5035 . i - row indices for "diagonal" portion of matrix 5036 . j - column indices 5037 . a - matrix values 5038 . oi - row indices for "off-diagonal" portion of matrix 5039 . oj - column indices 5040 - oa - matrix values 5041 5042 Output Parameter: 5043 . mat - the matrix 5044 5045 Level: advanced 5046 5047 Notes: 5048 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5049 must free the arrays once the matrix has been destroyed and not before. 5050 5051 The i and j indices are 0 based 5052 5053 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5054 5055 This sets local rows and cannot be used to set off-processor values. 5056 5057 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5058 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5059 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5060 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5061 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5062 communication if it is known that only local entries will be set. 5063 5064 .keywords: matrix, aij, compressed row, sparse, parallel 5065 5066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5067 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5068 @*/ 5069 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5070 { 5071 PetscErrorCode ierr; 5072 Mat_MPIAIJ *maij; 5073 5074 PetscFunctionBegin; 5075 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5076 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5077 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5078 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5079 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5080 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5081 maij = (Mat_MPIAIJ*) (*mat)->data; 5082 5083 (*mat)->preallocated = PETSC_TRUE; 5084 5085 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5086 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5087 5088 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5089 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5090 5091 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5092 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5093 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5094 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5095 5096 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5097 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5098 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5099 PetscFunctionReturn(0); 5100 } 5101 5102 /* 5103 Special version for direct calls from Fortran 5104 */ 5105 #include <petsc/private/fortranimpl.h> 5106 5107 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5108 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5109 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5110 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5111 #endif 5112 5113 /* Change these macros so can be used in void function */ 5114 #undef CHKERRQ 5115 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5116 #undef SETERRQ2 5117 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5118 #undef SETERRQ3 5119 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5120 #undef SETERRQ 5121 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5122 5123 #undef __FUNCT__ 5124 #define __FUNCT__ "matsetvaluesmpiaij_" 5125 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5126 { 5127 Mat mat = *mmat; 5128 PetscInt m = *mm, n = *mn; 5129 InsertMode addv = *maddv; 5130 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5131 PetscScalar value; 5132 PetscErrorCode ierr; 5133 5134 MatCheckPreallocated(mat,1); 5135 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5136 5137 #if defined(PETSC_USE_DEBUG) 5138 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5139 #endif 5140 { 5141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5142 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5143 PetscBool roworiented = aij->roworiented; 5144 5145 /* Some Variables required in the macro */ 5146 Mat A = aij->A; 5147 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5148 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5149 MatScalar *aa = a->a; 5150 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5151 Mat B = aij->B; 5152 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5153 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5154 MatScalar *ba = b->a; 5155 5156 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5157 PetscInt nonew = a->nonew; 5158 MatScalar *ap1,*ap2; 5159 5160 PetscFunctionBegin; 5161 for (i=0; i<m; i++) { 5162 if (im[i] < 0) continue; 5163 #if defined(PETSC_USE_DEBUG) 5164 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5165 #endif 5166 if (im[i] >= rstart && im[i] < rend) { 5167 row = im[i] - rstart; 5168 lastcol1 = -1; 5169 rp1 = aj + ai[row]; 5170 ap1 = aa + ai[row]; 5171 rmax1 = aimax[row]; 5172 nrow1 = ailen[row]; 5173 low1 = 0; 5174 high1 = nrow1; 5175 lastcol2 = -1; 5176 rp2 = bj + bi[row]; 5177 ap2 = ba + bi[row]; 5178 rmax2 = bimax[row]; 5179 nrow2 = bilen[row]; 5180 low2 = 0; 5181 high2 = nrow2; 5182 5183 for (j=0; j<n; j++) { 5184 if (roworiented) value = v[i*n+j]; 5185 else value = v[i+j*m]; 5186 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5187 if (in[j] >= cstart && in[j] < cend) { 5188 col = in[j] - cstart; 5189 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5190 } else if (in[j] < 0) continue; 5191 #if defined(PETSC_USE_DEBUG) 5192 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5193 #endif 5194 else { 5195 if (mat->was_assembled) { 5196 if (!aij->colmap) { 5197 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5198 } 5199 #if defined(PETSC_USE_CTABLE) 5200 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5201 col--; 5202 #else 5203 col = aij->colmap[in[j]] - 1; 5204 #endif 5205 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5206 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5207 col = in[j]; 5208 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5209 B = aij->B; 5210 b = (Mat_SeqAIJ*)B->data; 5211 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5212 rp2 = bj + bi[row]; 5213 ap2 = ba + bi[row]; 5214 rmax2 = bimax[row]; 5215 nrow2 = bilen[row]; 5216 low2 = 0; 5217 high2 = nrow2; 5218 bm = aij->B->rmap->n; 5219 ba = b->a; 5220 } 5221 } else col = in[j]; 5222 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5223 } 5224 } 5225 } else if (!aij->donotstash) { 5226 if (roworiented) { 5227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5228 } else { 5229 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5230 } 5231 } 5232 } 5233 } 5234 PetscFunctionReturnVoid(); 5235 } 5236 5237