1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ" 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 63 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 64 { 65 PetscErrorCode ierr; 66 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 67 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 68 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 69 const PetscInt *ia,*ib; 70 const MatScalar *aa,*bb; 71 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 72 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 73 74 PetscFunctionBegin; 75 *keptrows = 0; 76 ia = a->i; 77 ib = b->i; 78 for (i=0; i<m; i++) { 79 na = ia[i+1] - ia[i]; 80 nb = ib[i+1] - ib[i]; 81 if (!na && !nb) { 82 cnt++; 83 goto ok1; 84 } 85 aa = a->a + ia[i]; 86 for (j=0; j<na; j++) { 87 if (aa[j] != 0.0) goto ok1; 88 } 89 bb = b->a + ib[i]; 90 for (j=0; j <nb; j++) { 91 if (bb[j] != 0.0) goto ok1; 92 } 93 cnt++; 94 ok1:; 95 } 96 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 97 if (!n0rows) PetscFunctionReturn(0); 98 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 99 cnt = 0; 100 for (i=0; i<m; i++) { 101 na = ia[i+1] - ia[i]; 102 nb = ib[i+1] - ib[i]; 103 if (!na && !nb) continue; 104 aa = a->a + ia[i]; 105 for (j=0; j<na;j++) { 106 if (aa[j] != 0.0) { 107 rows[cnt++] = rstart + i; 108 goto ok2; 109 } 110 } 111 bb = b->a + ib[i]; 112 for (j=0; j<nb; j++) { 113 if (bb[j] != 0.0) { 114 rows[cnt++] = rstart + i; 115 goto ok2; 116 } 117 } 118 ok2:; 119 } 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 126 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 130 131 PetscFunctionBegin; 132 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 133 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 134 } else { 135 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 136 } 137 PetscFunctionReturn(0); 138 } 139 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 143 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 144 { 145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 146 PetscErrorCode ierr; 147 PetscInt i,rstart,nrows,*rows; 148 149 PetscFunctionBegin; 150 *zrows = NULL; 151 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 152 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 153 for (i=0; i<nrows; i++) rows[i] += rstart; 154 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 160 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 161 { 162 PetscErrorCode ierr; 163 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 164 PetscInt i,n,*garray = aij->garray; 165 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 166 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 167 PetscReal *work; 168 169 PetscFunctionBegin; 170 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 171 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 172 if (type == NORM_2) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 178 } 179 } else if (type == NORM_1) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 185 } 186 } else if (type == NORM_INFINITY) { 187 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 188 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 189 } 190 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 191 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 192 } 193 194 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 195 if (type == NORM_INFINITY) { 196 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 197 } else { 198 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 199 } 200 ierr = PetscFree(work);CHKERRQ(ierr); 201 if (type == NORM_2) { 202 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 203 } 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 209 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 210 { 211 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 212 IS sis,gis; 213 PetscErrorCode ierr; 214 const PetscInt *isis,*igis; 215 PetscInt n,*iis,nsis,ngis,rstart,i; 216 217 PetscFunctionBegin; 218 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 219 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 220 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 221 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 222 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 224 225 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 226 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 228 n = ngis + nsis; 229 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 230 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 231 for (i=0; i<n; i++) iis[i] += rstart; 232 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 233 234 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 235 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 236 ierr = ISDestroy(&sis);CHKERRQ(ierr); 237 ierr = ISDestroy(&gis);CHKERRQ(ierr); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatDistribute_MPIAIJ" 243 /* 244 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 245 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 246 247 Only for square matrices 248 249 Used by a preconditioner, hence PETSC_EXTERN 250 */ 251 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 252 { 253 PetscMPIInt rank,size; 254 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 255 PetscErrorCode ierr; 256 Mat mat; 257 Mat_SeqAIJ *gmata; 258 PetscMPIInt tag; 259 MPI_Status status; 260 PetscBool aij; 261 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 262 263 PetscFunctionBegin; 264 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 265 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 266 if (!rank) { 267 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 268 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 269 } 270 if (reuse == MAT_INITIAL_MATRIX) { 271 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 272 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 273 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 274 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 275 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 276 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 277 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 278 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 279 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 280 281 rowners[0] = 0; 282 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 283 rstart = rowners[rank]; 284 rend = rowners[rank+1]; 285 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 286 if (!rank) { 287 gmata = (Mat_SeqAIJ*) gmat->data; 288 /* send row lengths to all processors */ 289 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 290 for (i=1; i<size; i++) { 291 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 } 293 /* determine number diagonal and off-diagonal counts */ 294 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 295 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 296 jj = 0; 297 for (i=0; i<m; i++) { 298 for (j=0; j<dlens[i]; j++) { 299 if (gmata->j[jj] < rstart) ld[i]++; 300 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 301 jj++; 302 } 303 } 304 /* send column indices to other processes */ 305 for (i=1; i<size; i++) { 306 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 307 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 308 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 309 } 310 311 /* send numerical values to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 gmataa = gmata->a; 317 gmataj = gmata->j; 318 319 } else { 320 /* receive row lengths */ 321 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 322 /* receive column indices */ 323 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 324 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 326 /* determine number diagonal and off-diagonal counts */ 327 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 328 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 329 jj = 0; 330 for (i=0; i<m; i++) { 331 for (j=0; j<dlens[i]; j++) { 332 if (gmataj[jj] < rstart) ld[i]++; 333 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 334 jj++; 335 } 336 } 337 /* receive numerical values */ 338 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 340 } 341 /* set preallocation */ 342 for (i=0; i<m; i++) { 343 dlens[i] -= olens[i]; 344 } 345 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 346 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 347 348 for (i=0; i<m; i++) { 349 dlens[i] += olens[i]; 350 } 351 cnt = 0; 352 for (i=0; i<m; i++) { 353 row = rstart + i; 354 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 355 cnt += dlens[i]; 356 } 357 if (rank) { 358 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 359 } 360 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 361 ierr = PetscFree(rowners);CHKERRQ(ierr); 362 363 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 364 365 *inmat = mat; 366 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 367 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 368 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 369 mat = *inmat; 370 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 371 if (!rank) { 372 /* send numerical values to other processes */ 373 gmata = (Mat_SeqAIJ*) gmat->data; 374 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 375 gmataa = gmata->a; 376 for (i=1; i<size; i++) { 377 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 378 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 379 } 380 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 381 } else { 382 /* receive numerical values from process 0*/ 383 nz = Ad->nz + Ao->nz; 384 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 385 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 386 } 387 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 388 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 389 ad = Ad->a; 390 ao = Ao->a; 391 if (mat->rmap->n) { 392 i = 0; 393 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 394 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 395 } 396 for (i=1; i<mat->rmap->n; i++) { 397 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 398 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 399 } 400 i--; 401 if (mat->rmap->n) { 402 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 403 } 404 if (rank) { 405 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 406 } 407 } 408 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 /* 414 Local utility routine that creates a mapping from the global column 415 number to the local number in the off-diagonal part of the local 416 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 417 a slightly higher hash table cost; without it it is not scalable (each processor 418 has an order N integer array but is fast to acess. 419 */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 422 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 423 { 424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 425 PetscErrorCode ierr; 426 PetscInt n = aij->B->cmap->n,i; 427 428 PetscFunctionBegin; 429 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 430 #if defined(PETSC_USE_CTABLE) 431 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 432 for (i=0; i<n; i++) { 433 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 434 } 435 #else 436 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 438 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 439 #endif 440 PetscFunctionReturn(0); 441 } 442 443 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 444 { \ 445 if (col <= lastcol1) low1 = 0; \ 446 else high1 = nrow1; \ 447 lastcol1 = col;\ 448 while (high1-low1 > 5) { \ 449 t = (low1+high1)/2; \ 450 if (rp1[t] > col) high1 = t; \ 451 else low1 = t; \ 452 } \ 453 for (_i=low1; _i<high1; _i++) { \ 454 if (rp1[_i] > col) break; \ 455 if (rp1[_i] == col) { \ 456 if (addv == ADD_VALUES) ap1[_i] += value; \ 457 else ap1[_i] = value; \ 458 goto a_noinsert; \ 459 } \ 460 } \ 461 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 462 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 463 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 464 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 465 N = nrow1++ - 1; a->nz++; high1++; \ 466 /* shift up all the later entries in this row */ \ 467 for (ii=N; ii>=_i; ii--) { \ 468 rp1[ii+1] = rp1[ii]; \ 469 ap1[ii+1] = ap1[ii]; \ 470 } \ 471 rp1[_i] = col; \ 472 ap1[_i] = value; \ 473 A->nonzerostate++;\ 474 a_noinsert: ; \ 475 ailen[row] = nrow1; \ 476 } 477 478 479 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 480 { \ 481 if (col <= lastcol2) low2 = 0; \ 482 else high2 = nrow2; \ 483 lastcol2 = col; \ 484 while (high2-low2 > 5) { \ 485 t = (low2+high2)/2; \ 486 if (rp2[t] > col) high2 = t; \ 487 else low2 = t; \ 488 } \ 489 for (_i=low2; _i<high2; _i++) { \ 490 if (rp2[_i] > col) break; \ 491 if (rp2[_i] == col) { \ 492 if (addv == ADD_VALUES) ap2[_i] += value; \ 493 else ap2[_i] = value; \ 494 goto b_noinsert; \ 495 } \ 496 } \ 497 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 498 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 499 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 500 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 501 N = nrow2++ - 1; b->nz++; high2++; \ 502 /* shift up all the later entries in this row */ \ 503 for (ii=N; ii>=_i; ii--) { \ 504 rp2[ii+1] = rp2[ii]; \ 505 ap2[ii+1] = ap2[ii]; \ 506 } \ 507 rp2[_i] = col; \ 508 ap2[_i] = value; \ 509 B->nonzerostate++; \ 510 b_noinsert: ; \ 511 bilen[row] = nrow2; \ 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 516 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 517 { 518 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 520 PetscErrorCode ierr; 521 PetscInt l,*garray = mat->garray,diag; 522 523 PetscFunctionBegin; 524 /* code only works for square matrices A */ 525 526 /* find size of row to the left of the diagonal part */ 527 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 528 row = row - diag; 529 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 530 if (garray[b->j[b->i[row]+l]] > diag) break; 531 } 532 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 533 534 /* diagonal part */ 535 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 536 537 /* right of diagonal part */ 538 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatSetValues_MPIAIJ" 544 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 547 PetscScalar value; 548 PetscErrorCode ierr; 549 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 551 PetscBool roworiented = aij->roworiented; 552 553 /* Some Variables required in the macro */ 554 Mat A = aij->A; 555 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 556 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 557 MatScalar *aa = a->a; 558 PetscBool ignorezeroentries = a->ignorezeroentries; 559 Mat B = aij->B; 560 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 561 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 562 MatScalar *ba = b->a; 563 564 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 565 PetscInt nonew; 566 MatScalar *ap1,*ap2; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (im[i] < 0) continue; 571 #if defined(PETSC_USE_DEBUG) 572 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 573 #endif 574 if (im[i] >= rstart && im[i] < rend) { 575 row = im[i] - rstart; 576 lastcol1 = -1; 577 rp1 = aj + ai[row]; 578 ap1 = aa + ai[row]; 579 rmax1 = aimax[row]; 580 nrow1 = ailen[row]; 581 low1 = 0; 582 high1 = nrow1; 583 lastcol2 = -1; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 591 for (j=0; j<n; j++) { 592 if (roworiented) value = v[i*n+j]; 593 else value = v[i+j*m]; 594 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 595 if (in[j] >= cstart && in[j] < cend) { 596 col = in[j] - cstart; 597 nonew = a->nonew; 598 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 599 } else if (in[j] < 0) continue; 600 #if defined(PETSC_USE_DEBUG) 601 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 602 #endif 603 else { 604 if (mat->was_assembled) { 605 if (!aij->colmap) { 606 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 607 } 608 #if defined(PETSC_USE_CTABLE) 609 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 610 col--; 611 #else 612 col = aij->colmap[in[j]] - 1; 613 #endif 614 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 615 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 616 col = in[j]; 617 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 618 B = aij->B; 619 b = (Mat_SeqAIJ*)B->data; 620 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 621 rp2 = bj + bi[row]; 622 ap2 = ba + bi[row]; 623 rmax2 = bimax[row]; 624 nrow2 = bilen[row]; 625 low2 = 0; 626 high2 = nrow2; 627 bm = aij->B->rmap->n; 628 ba = b->a; 629 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 630 } else col = in[j]; 631 nonew = b->nonew; 632 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 633 } 634 } 635 } else { 636 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 637 if (!aij->donotstash) { 638 mat->assembled = PETSC_FALSE; 639 if (roworiented) { 640 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 641 } else { 642 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatGetValues_MPIAIJ" 652 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 657 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 658 659 PetscFunctionBegin; 660 for (i=0; i<m; i++) { 661 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 662 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 663 if (idxm[i] >= rstart && idxm[i] < rend) { 664 row = idxm[i] - rstart; 665 for (j=0; j<n; j++) { 666 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 667 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 668 if (idxn[j] >= cstart && idxn[j] < cend) { 669 col = idxn[j] - cstart; 670 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 671 } else { 672 if (!aij->colmap) { 673 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 674 } 675 #if defined(PETSC_USE_CTABLE) 676 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 677 col--; 678 #else 679 col = aij->colmap[idxn[j]] - 1; 680 #endif 681 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 682 else { 683 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 696 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt nstash,reallocs; 701 702 PetscFunctionBegin; 703 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 704 705 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 706 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 707 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 713 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 714 { 715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 716 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt n; 719 PetscInt i,j,rstart,ncols,flg; 720 PetscInt *row,*col; 721 PetscBool other_disassembled; 722 PetscScalar *val; 723 724 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 725 726 PetscFunctionBegin; 727 if (!aij->donotstash && !mat->nooffprocentries) { 728 while (1) { 729 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 730 if (!flg) break; 731 732 for (i=0; i<n; ) { 733 /* Now identify the consecutive vals belonging to the same row */ 734 for (j=i,rstart=row[j]; j<n; j++) { 735 if (row[j] != rstart) break; 736 } 737 if (j < n) ncols = j-i; 738 else ncols = n-i; 739 /* Now assemble all these values with a single function call */ 740 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 741 742 i = j; 743 } 744 } 745 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 746 } 747 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 749 750 /* determine if any processor has disassembled, if so we must 751 also disassemble ourselfs, in order that we may reassemble. */ 752 /* 753 if nonzero structure of submatrix B cannot change then we know that 754 no processor disassembled thus we can skip this stuff 755 */ 756 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 757 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 758 if (mat->was_assembled && !other_disassembled) { 759 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 760 } 761 } 762 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 763 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 764 } 765 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 766 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 768 769 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 770 771 aij->rowvalues = 0; 772 773 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 774 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 775 776 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 777 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 778 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 779 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 786 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 787 { 788 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 793 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatZeroRows_MPIAIJ" 799 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 800 { 801 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 802 PetscInt *lrows; 803 PetscInt r, len; 804 PetscErrorCode ierr; 805 806 PetscFunctionBegin; 807 /* get locally owned rows */ 808 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 809 /* fix right hand side if needed */ 810 if (x && b) { 811 const PetscScalar *xx; 812 PetscScalar *bb; 813 814 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 815 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 817 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 818 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 819 } 820 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 821 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 822 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 823 PetscBool cong; 824 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 825 if (cong) A->congruentlayouts = 1; 826 else A->congruentlayouts = 0; 827 } 828 if ((diag != 0.0) && A->congruentlayouts) { 829 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 830 } else if (diag != 0.0) { 831 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 832 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 833 for (r = 0; r < len; ++r) { 834 const PetscInt row = lrows[r] + A->rmap->rstart; 835 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 } else { 840 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(lrows);CHKERRQ(ierr); 843 844 /* only change matrix nonzero state if pattern was allowed to be changed */ 845 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 846 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 847 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 854 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 855 { 856 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 857 PetscErrorCode ierr; 858 PetscMPIInt n = A->rmap->n; 859 PetscInt i,j,r,m,p = 0,len = 0; 860 PetscInt *lrows,*owners = A->rmap->range; 861 PetscSFNode *rrows; 862 PetscSF sf; 863 const PetscScalar *xx; 864 PetscScalar *bb,*mask; 865 Vec xmask,lmask; 866 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 867 const PetscInt *aj, *ii,*ridx; 868 PetscScalar *aa; 869 870 PetscFunctionBegin; 871 /* Create SF where leaves are input rows and roots are owned rows */ 872 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 873 for (r = 0; r < n; ++r) lrows[r] = -1; 874 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 875 for (r = 0; r < N; ++r) { 876 const PetscInt idx = rows[r]; 877 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 878 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 879 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 880 } 881 rrows[r].rank = p; 882 rrows[r].index = rows[r] - owners[p]; 883 } 884 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 885 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 886 /* Collect flags for rows to be zeroed */ 887 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 889 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 890 /* Compress and put in row numbers */ 891 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 892 /* zero diagonal part of matrix */ 893 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 894 /* handle off diagonal part of matrix */ 895 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 896 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 897 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 898 for (i=0; i<len; i++) bb[lrows[i]] = 1; 899 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 900 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 903 if (x) { 904 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 907 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 908 } 909 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 910 /* remove zeroed rows of off diagonal matrix */ 911 ii = aij->i; 912 for (i=0; i<len; i++) { 913 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 914 } 915 /* loop over all elements of off process part of matrix zeroing removed columns*/ 916 if (aij->compressedrow.use) { 917 m = aij->compressedrow.nrows; 918 ii = aij->compressedrow.i; 919 ridx = aij->compressedrow.rindex; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij->a + ii[i]; 924 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[*ridx] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 ridx++; 934 } 935 } else { /* do not use compressed row format */ 936 m = l->B->rmap->n; 937 for (i=0; i<m; i++) { 938 n = ii[i+1] - ii[i]; 939 aj = aij->j + ii[i]; 940 aa = aij->a + ii[i]; 941 for (j=0; j<n; j++) { 942 if (PetscAbsScalar(mask[*aj])) { 943 if (b) bb[i] -= *aa*xx[*aj]; 944 *aa = 0.0; 945 } 946 aa++; 947 aj++; 948 } 949 } 950 } 951 if (x) { 952 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 953 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 954 } 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatMult_MPIAIJ" 969 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 970 { 971 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 972 PetscErrorCode ierr; 973 PetscInt nt; 974 975 PetscFunctionBegin; 976 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 977 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 978 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatMultAdd_MPIAIJ" 999 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1000 { 1001 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1007 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1014 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1015 { 1016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 PetscBool merged; 1019 1020 PetscFunctionBegin; 1021 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1022 /* do nondiagonal part */ 1023 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1024 if (!merged) { 1025 /* send it on its way */ 1026 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1027 /* do local part */ 1028 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1029 /* receive remote parts: note this assumes the values are not actually */ 1030 /* added in yy until the next line, */ 1031 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1032 } else { 1033 /* do local part */ 1034 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1035 /* send it on its way */ 1036 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1037 /* values actually were received in the Begin() but we need to call this nop */ 1038 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 } 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1045 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1046 { 1047 MPI_Comm comm; 1048 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1049 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1050 IS Me,Notme; 1051 PetscErrorCode ierr; 1052 PetscInt M,N,first,last,*notme,i; 1053 PetscMPIInt size; 1054 1055 PetscFunctionBegin; 1056 /* Easy test: symmetric diagonal block */ 1057 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1058 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1059 if (!*f) PetscFunctionReturn(0); 1060 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1062 if (size == 1) PetscFunctionReturn(0); 1063 1064 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1065 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1066 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1067 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1068 for (i=0; i<first; i++) notme[i] = i; 1069 for (i=last; i<M; i++) notme[i-last+first] = i; 1070 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1071 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1072 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1073 Aoff = Aoffs[0]; 1074 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1075 Boff = Boffs[0]; 1076 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1077 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1078 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1079 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1080 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1081 ierr = PetscFree(notme);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1087 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 /* do nondiagonal part */ 1094 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1095 /* send it on its way */ 1096 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1097 /* do local part */ 1098 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1099 /* receive remote parts */ 1100 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 /* 1105 This only works correctly for square matrices where the subblock A->A is the 1106 diagonal block 1107 */ 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1110 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1111 { 1112 PetscErrorCode ierr; 1113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1114 1115 PetscFunctionBegin; 1116 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1117 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1118 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatScale_MPIAIJ" 1124 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1131 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatDestroy_MPIAIJ" 1137 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 #if defined(PETSC_USE_LOG) 1144 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1145 #endif 1146 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1147 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1149 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1150 #if defined(PETSC_USE_CTABLE) 1151 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1152 #else 1153 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1154 #endif 1155 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1156 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1157 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1158 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1159 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1160 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1161 1162 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 #if defined(PETSC_HAVE_HYPRE) 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1181 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1184 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1185 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1186 PetscErrorCode ierr; 1187 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1188 int fd; 1189 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1190 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1191 PetscScalar *column_values; 1192 PetscInt message_count,flowcontrolcount; 1193 FILE *file; 1194 1195 PetscFunctionBegin; 1196 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1197 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1198 nz = A->nz + B->nz; 1199 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1200 if (!rank) { 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = mat->rmap->N; 1203 header[2] = mat->cmap->N; 1204 1205 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1207 /* get largest number of rows any processor has */ 1208 rlen = mat->rmap->n; 1209 range = mat->rmap->range; 1210 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1211 } else { 1212 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 rlen = mat->rmap->n; 1214 } 1215 1216 /* load up the local row counts */ 1217 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1218 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1219 1220 /* store the row lengths to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 for (i=1; i<size; i++) { 1225 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1226 rlen = range[i+1] - range[i]; 1227 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1231 } else { 1232 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1233 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1237 1238 /* load up the local column indices */ 1239 nzmax = nz; /* th processor needs space a largest processor needs */ 1240 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1242 cnt = 0; 1243 for (i=0; i<mat->rmap->n; i++) { 1244 for (j=B->i[i]; j<B->i[i+1]; j++) { 1245 if ((col = garray[B->j[j]]) > cstart) break; 1246 column_indices[cnt++] = col; 1247 } 1248 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1249 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1250 } 1251 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1252 1253 /* store the column indices to the file */ 1254 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1255 if (!rank) { 1256 MPI_Status status; 1257 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1258 for (i=1; i<size; i++) { 1259 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1260 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1261 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1262 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1266 } else { 1267 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1268 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1271 } 1272 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1273 1274 /* load up the local column values */ 1275 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1276 cnt = 0; 1277 for (i=0; i<mat->rmap->n; i++) { 1278 for (j=B->i[i]; j<B->i[i+1]; j++) { 1279 if (garray[B->j[j]] > cstart) break; 1280 column_values[cnt++] = B->a[j]; 1281 } 1282 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1283 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1284 } 1285 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1286 1287 /* store the column values to the file */ 1288 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1289 if (!rank) { 1290 MPI_Status status; 1291 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1292 for (i=1; i<size; i++) { 1293 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1294 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1295 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1296 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1297 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 } 1299 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1300 } else { 1301 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1302 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1304 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1305 } 1306 ierr = PetscFree(column_values);CHKERRQ(ierr); 1307 1308 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1309 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #include <petscdraw.h> 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1316 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 PetscErrorCode ierr; 1320 PetscMPIInt rank = aij->rank,size = aij->size; 1321 PetscBool isdraw,iascii,isbinary; 1322 PetscViewer sviewer; 1323 PetscViewerFormat format; 1324 1325 PetscFunctionBegin; 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1328 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1329 if (iascii) { 1330 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1331 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1332 MatInfo info; 1333 PetscBool inodes; 1334 1335 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1336 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1337 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1338 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1339 if (!inodes) { 1340 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1341 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1344 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1345 } 1346 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1350 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1352 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1353 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1356 PetscInt inodecount,inodelimit,*inodes; 1357 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1358 if (inodes) { 1359 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1362 } 1363 PetscFunctionReturn(0); 1364 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1365 PetscFunctionReturn(0); 1366 } 1367 } else if (isbinary) { 1368 if (size == 1) { 1369 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1370 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1371 } else { 1372 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } else if (isdraw) { 1376 PetscDraw draw; 1377 PetscBool isnull; 1378 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1379 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1380 if (isnull) PetscFunctionReturn(0); 1381 } 1382 1383 { 1384 /* assemble the entire matrix onto first processor. */ 1385 Mat A; 1386 Mat_SeqAIJ *Aloc; 1387 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1388 MatScalar *a; 1389 1390 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1391 if (!rank) { 1392 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1393 } else { 1394 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1395 } 1396 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1397 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1398 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1399 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1400 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1401 1402 /* copy over the A part */ 1403 Aloc = (Mat_SeqAIJ*)aij->A->data; 1404 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1405 row = mat->rmap->rstart; 1406 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; 1410 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)aij->B->data; 1417 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1418 row = mat->rmap->rstart; 1419 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; 1425 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1426 } 1427 ierr = PetscFree(ct);CHKERRQ(ierr); 1428 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 /* 1431 Everyone has to call to draw the matrix since the graphics waits are 1432 synchronized across all processors that share the PetscDraw object 1433 */ 1434 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1435 if (!rank) { 1436 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1437 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1438 } 1439 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1441 ierr = MatDestroy(&A);CHKERRQ(ierr); 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatView_MPIAIJ" 1448 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1449 { 1450 PetscErrorCode ierr; 1451 PetscBool iascii,isdraw,issocket,isbinary; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1458 if (iascii || isdraw || isbinary || issocket) { 1459 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSOR_MPIAIJ" 1466 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1467 { 1468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1469 PetscErrorCode ierr; 1470 Vec bb1 = 0; 1471 PetscBool hasop; 1472 1473 PetscFunctionBegin; 1474 if (flag == SOR_APPLY_UPPER) { 1475 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1480 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1481 } 1482 1483 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1484 if (flag & SOR_ZERO_INITIAL_GUESS) { 1485 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1486 its--; 1487 } 1488 1489 while (its--) { 1490 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 1493 /* update rhs: bb1 = bb - B*x */ 1494 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1495 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1496 1497 /* local sweep */ 1498 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1499 } 1500 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1501 if (flag & SOR_ZERO_INITIAL_GUESS) { 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1503 its--; 1504 } 1505 while (its--) { 1506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 1509 /* update rhs: bb1 = bb - B*x */ 1510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1512 1513 /* local sweep */ 1514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1515 } 1516 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1517 if (flag & SOR_ZERO_INITIAL_GUESS) { 1518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1519 its--; 1520 } 1521 while (its--) { 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 1525 /* update rhs: bb1 = bb - B*x */ 1526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1531 } 1532 } else if (flag & SOR_EISENSTAT) { 1533 Vec xx1; 1534 1535 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1537 1538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 if (!mat->diag) { 1541 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1542 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1543 } 1544 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1545 if (hasop) { 1546 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1547 } else { 1548 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1549 } 1550 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1551 1552 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1553 1554 /* local sweep */ 1555 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1556 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1557 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1558 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1559 1560 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1561 1562 matin->factorerrortype = mat->A->factorerrortype; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatPermute_MPIAIJ" 1568 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1569 { 1570 Mat aA,aB,Aperm; 1571 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1572 PetscScalar *aa,*ba; 1573 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1574 PetscSF rowsf,sf; 1575 IS parcolp = NULL; 1576 PetscBool done; 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1581 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1582 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1583 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1584 1585 /* Invert row permutation to find out where my rows should go */ 1586 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1587 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1588 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1590 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1591 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 1593 /* Invert column permutation to find out where my columns should go */ 1594 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1595 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1596 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1597 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1598 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1599 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1600 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1601 1602 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1603 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1604 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1605 1606 /* Find out where my gcols should go */ 1607 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1608 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1609 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1610 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1611 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1612 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1614 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1615 1616 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1617 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1618 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1619 for (i=0; i<m; i++) { 1620 PetscInt row = rdest[i],rowner; 1621 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1622 for (j=ai[i]; j<ai[i+1]; j++) { 1623 PetscInt cowner,col = cdest[aj[j]]; 1624 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1625 if (rowner == cowner) dnnz[i]++; 1626 else onnz[i]++; 1627 } 1628 for (j=bi[i]; j<bi[i+1]; j++) { 1629 PetscInt cowner,col = gcdest[bj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 } 1635 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1636 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1637 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1639 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1640 1641 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1643 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1644 for (i=0; i<m; i++) { 1645 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1646 PetscInt j0,rowlen; 1647 rowlen = ai[i+1] - ai[i]; 1648 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1649 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1650 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 rowlen = bi[i+1] - bi[i]; 1653 for (j0=j=0; j<rowlen; j0=j) { 1654 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1655 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1656 } 1657 } 1658 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1661 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1662 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1663 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1664 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1665 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1666 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1667 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1668 *B = Aperm; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1674 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1675 { 1676 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1681 if (ghosts) *ghosts = aij->garray; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1688 { 1689 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1690 Mat A = mat->A,B = mat->B; 1691 PetscErrorCode ierr; 1692 PetscReal isend[5],irecv[5]; 1693 1694 PetscFunctionBegin; 1695 info->block_size = 1.0; 1696 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1697 1698 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1699 isend[3] = info->memory; isend[4] = info->mallocs; 1700 1701 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1704 isend[3] += info->memory; isend[4] += info->mallocs; 1705 if (flag == MAT_LOCAL) { 1706 info->nz_used = isend[0]; 1707 info->nz_allocated = isend[1]; 1708 info->nz_unneeded = isend[2]; 1709 info->memory = isend[3]; 1710 info->mallocs = isend[4]; 1711 } else if (flag == MAT_GLOBAL_MAX) { 1712 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1713 1714 info->nz_used = irecv[0]; 1715 info->nz_allocated = irecv[1]; 1716 info->nz_unneeded = irecv[2]; 1717 info->memory = irecv[3]; 1718 info->mallocs = irecv[4]; 1719 } else if (flag == MAT_GLOBAL_SUM) { 1720 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } 1728 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1729 info->fill_ratio_needed = 0; 1730 info->factor_mallocs = 0; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatSetOption_MPIAIJ" 1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 switch (op) { 1743 case MAT_NEW_NONZERO_LOCATIONS: 1744 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1745 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1746 case MAT_KEEP_NONZERO_PATTERN: 1747 case MAT_NEW_NONZERO_LOCATION_ERR: 1748 case MAT_USE_INODES: 1749 case MAT_IGNORE_ZERO_ENTRIES: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_ROW_ORIENTED: 1755 MatCheckPreallocated(A,1); 1756 a->roworiented = flg; 1757 1758 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1759 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1760 break; 1761 case MAT_NEW_DIAGONALS: 1762 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1763 break; 1764 case MAT_IGNORE_OFF_PROC_ENTRIES: 1765 a->donotstash = flg; 1766 break; 1767 case MAT_SPD: 1768 A->spd_set = PETSC_TRUE; 1769 A->spd = flg; 1770 if (flg) { 1771 A->symmetric = PETSC_TRUE; 1772 A->structurally_symmetric = PETSC_TRUE; 1773 A->symmetric_set = PETSC_TRUE; 1774 A->structurally_symmetric_set = PETSC_TRUE; 1775 } 1776 break; 1777 case MAT_SYMMETRIC: 1778 MatCheckPreallocated(A,1); 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_STRUCTURALLY_SYMMETRIC: 1782 MatCheckPreallocated(A,1); 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_HERMITIAN: 1786 MatCheckPreallocated(A,1); 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 break; 1789 case MAT_SYMMETRY_ETERNAL: 1790 MatCheckPreallocated(A,1); 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_SUBMAT_SINGLEIS: 1794 A->submat_singleis = flg; 1795 break; 1796 default: 1797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatGetRow_MPIAIJ" 1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1805 { 1806 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1807 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1808 PetscErrorCode ierr; 1809 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1810 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1811 PetscInt *cmap,*idx_p; 1812 1813 PetscFunctionBegin; 1814 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1815 mat->getrowactive = PETSC_TRUE; 1816 1817 if (!mat->rowvalues && (idx || v)) { 1818 /* 1819 allocate enough space to hold information from the longest row. 1820 */ 1821 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1822 PetscInt max = 1,tmp; 1823 for (i=0; i<matin->rmap->n; i++) { 1824 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1825 if (max < tmp) max = tmp; 1826 } 1827 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1828 } 1829 1830 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1831 lrow = row - rstart; 1832 1833 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1834 if (!v) {pvA = 0; pvB = 0;} 1835 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1836 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1837 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1838 nztot = nzA + nzB; 1839 1840 cmap = mat->garray; 1841 if (v || idx) { 1842 if (nztot) { 1843 /* Sort by increasing column numbers, assuming A and B already sorted */ 1844 PetscInt imark = -1; 1845 if (v) { 1846 *v = v_p = mat->rowvalues; 1847 for (i=0; i<nzB; i++) { 1848 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1849 else break; 1850 } 1851 imark = i; 1852 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1853 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1854 } 1855 if (idx) { 1856 *idx = idx_p = mat->rowindices; 1857 if (imark > -1) { 1858 for (i=0; i<imark; i++) { 1859 idx_p[i] = cmap[cworkB[i]]; 1860 } 1861 } else { 1862 for (i=0; i<nzB; i++) { 1863 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1864 else break; 1865 } 1866 imark = i; 1867 } 1868 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1869 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1870 } 1871 } else { 1872 if (idx) *idx = 0; 1873 if (v) *v = 0; 1874 } 1875 } 1876 *nz = nztot; 1877 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1885 { 1886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1887 1888 PetscFunctionBegin; 1889 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1890 aij->getrowactive = PETSC_FALSE; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatNorm_MPIAIJ" 1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1897 { 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1899 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1900 PetscErrorCode ierr; 1901 PetscInt i,j,cstart = mat->cmap->rstart; 1902 PetscReal sum = 0.0; 1903 MatScalar *v; 1904 1905 PetscFunctionBegin; 1906 if (aij->size == 1) { 1907 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1908 } else { 1909 if (type == NORM_FROBENIUS) { 1910 v = amat->a; 1911 for (i=0; i<amat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 v = bmat->a; 1915 for (i=0; i<bmat->nz; i++) { 1916 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1917 } 1918 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1919 *norm = PetscSqrtReal(*norm); 1920 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1921 } else if (type == NORM_1) { /* max column norm */ 1922 PetscReal *tmp,*tmp2; 1923 PetscInt *jj,*garray = aij->garray; 1924 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1925 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1926 *norm = 0.0; 1927 v = amat->a; jj = amat->j; 1928 for (j=0; j<amat->nz; j++) { 1929 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a; jj = bmat->j; 1932 for (j=0; j<bmat->nz; j++) { 1933 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1934 } 1935 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 for (j=0; j<mat->cmap->N; j++) { 1937 if (tmp2[j] > *norm) *norm = tmp2[j]; 1938 } 1939 ierr = PetscFree(tmp);CHKERRQ(ierr); 1940 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1941 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1942 } else if (type == NORM_INFINITY) { /* max row norm */ 1943 PetscReal ntemp = 0.0; 1944 for (j=0; j<aij->A->rmap->n; j++) { 1945 v = amat->a + amat->i[j]; 1946 sum = 0.0; 1947 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1948 sum += PetscAbsScalar(*v); v++; 1949 } 1950 v = bmat->a + bmat->i[j]; 1951 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1952 sum += PetscAbsScalar(*v); v++; 1953 } 1954 if (sum > ntemp) ntemp = sum; 1955 } 1956 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1957 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1958 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatTranspose_MPIAIJ" 1965 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1966 { 1967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1968 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1969 PetscErrorCode ierr; 1970 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1971 PetscInt cstart = A->cmap->rstart,ncol; 1972 Mat B; 1973 MatScalar *array; 1974 1975 PetscFunctionBegin; 1976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1977 1978 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1979 ai = Aloc->i; aj = Aloc->j; 1980 bi = Bloc->i; bj = Bloc->j; 1981 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1982 PetscInt *d_nnz,*g_nnz,*o_nnz; 1983 PetscSFNode *oloc; 1984 PETSC_UNUSED PetscSF sf; 1985 1986 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1987 /* compute d_nnz for preallocation */ 1988 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1989 for (i=0; i<ai[ma]; i++) { 1990 d_nnz[aj[i]]++; 1991 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1992 } 1993 /* compute local off-diagonal contributions */ 1994 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1995 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1996 /* map those to global */ 1997 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1998 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1999 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2000 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2001 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2002 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2003 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2004 2005 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2006 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2007 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2008 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2009 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2010 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2011 } else { 2012 B = *matout; 2013 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2015 } 2016 2017 /* copy over the A part */ 2018 array = Aloc->a; 2019 row = A->rmap->rstart; 2020 for (i=0; i<ma; i++) { 2021 ncol = ai[i+1]-ai[i]; 2022 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2023 row++; 2024 array += ncol; aj += ncol; 2025 } 2026 aj = Aloc->j; 2027 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2028 2029 /* copy over the B part */ 2030 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2031 array = Bloc->a; 2032 row = A->rmap->rstart; 2033 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2034 cols_tmp = cols; 2035 for (i=0; i<mb; i++) { 2036 ncol = bi[i+1]-bi[i]; 2037 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2038 row++; 2039 array += ncol; cols_tmp += ncol; 2040 } 2041 ierr = PetscFree(cols);CHKERRQ(ierr); 2042 2043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2046 *matout = B; 2047 } else { 2048 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2056 { 2057 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2058 Mat a = aij->A,b = aij->B; 2059 PetscErrorCode ierr; 2060 PetscInt s1,s2,s3; 2061 2062 PetscFunctionBegin; 2063 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2064 if (rr) { 2065 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2066 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2067 /* Overlap communication with computation. */ 2068 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 } 2070 if (ll) { 2071 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2072 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2073 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2074 } 2075 /* scale the diagonal block */ 2076 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2077 2078 if (rr) { 2079 /* Do a scatter end and then right scale the off-diagonal block */ 2080 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2088 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2089 { 2090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatEqual_MPIAIJ" 2100 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2101 { 2102 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2103 Mat a,b,c,d; 2104 PetscBool flg; 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 a = matA->A; b = matA->B; 2109 c = matB->A; d = matB->B; 2110 2111 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2112 if (flg) { 2113 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2114 } 2115 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatCopy_MPIAIJ" 2121 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2126 2127 PetscFunctionBegin; 2128 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2129 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2130 /* because of the column compression in the off-processor part of the matrix a->B, 2131 the number of columns in a->B and b->B may be different, hence we cannot call 2132 the MatCopy() directly on the two parts. If need be, we can provide a more 2133 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2134 then copying the submatrices */ 2135 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2136 } else { 2137 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2138 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2139 } 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatSetUp_MPIAIJ" 2145 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2146 { 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 /* 2155 Computes the number of nonzeros per row needed for preallocation when X and Y 2156 have different nonzero structure. 2157 */ 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2160 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2161 { 2162 PetscInt i,j,k,nzx,nzy; 2163 2164 PetscFunctionBegin; 2165 /* Set the number of nonzeros in the new matrix */ 2166 for (i=0; i<m; i++) { 2167 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2168 nzx = xi[i+1] - xi[i]; 2169 nzy = yi[i+1] - yi[i]; 2170 nnz[i] = 0; 2171 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2172 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2173 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2174 nnz[i]++; 2175 } 2176 for (; k<nzy; k++) nnz[i]++; 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2184 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2185 { 2186 PetscErrorCode ierr; 2187 PetscInt m = Y->rmap->N; 2188 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2189 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2190 2191 PetscFunctionBegin; 2192 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatAXPY_MPIAIJ" 2198 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2199 { 2200 PetscErrorCode ierr; 2201 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2202 PetscBLASInt bnz,one=1; 2203 Mat_SeqAIJ *x,*y; 2204 2205 PetscFunctionBegin; 2206 if (str == SAME_NONZERO_PATTERN) { 2207 PetscScalar alpha = a; 2208 x = (Mat_SeqAIJ*)xx->A->data; 2209 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2210 y = (Mat_SeqAIJ*)yy->A->data; 2211 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2212 x = (Mat_SeqAIJ*)xx->B->data; 2213 y = (Mat_SeqAIJ*)yy->B->data; 2214 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2215 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2216 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2217 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2218 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2219 } else { 2220 Mat B; 2221 PetscInt *nnz_d,*nnz_o; 2222 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2223 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2224 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2225 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2226 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2227 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2228 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2229 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2231 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2232 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2233 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2234 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2235 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatConjugate_MPIAIJ" 2244 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2245 { 2246 #if defined(PETSC_USE_COMPLEX) 2247 PetscErrorCode ierr; 2248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2249 2250 PetscFunctionBegin; 2251 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2252 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2253 #else 2254 PetscFunctionBegin; 2255 #endif 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatRealPart_MPIAIJ" 2261 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2262 { 2263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2268 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2275 { 2276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2277 PetscErrorCode ierr; 2278 2279 PetscFunctionBegin; 2280 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2281 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2287 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2288 { 2289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2290 PetscErrorCode ierr; 2291 PetscInt i,*idxb = 0; 2292 PetscScalar *va,*vb; 2293 Vec vtmp; 2294 2295 PetscFunctionBegin; 2296 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2297 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2298 if (idx) { 2299 for (i=0; i<A->rmap->n; i++) { 2300 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2301 } 2302 } 2303 2304 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2305 if (idx) { 2306 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2307 } 2308 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2309 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2310 2311 for (i=0; i<A->rmap->n; i++) { 2312 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2313 va[i] = vb[i]; 2314 if (idx) idx[i] = a->garray[idxb[i]]; 2315 } 2316 } 2317 2318 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2320 ierr = PetscFree(idxb);CHKERRQ(ierr); 2321 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,*idxb = 0; 2332 PetscScalar *va,*vb; 2333 Vec vtmp; 2334 2335 PetscFunctionBegin; 2336 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2337 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2338 if (idx) { 2339 for (i=0; i<A->cmap->n; i++) { 2340 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2341 } 2342 } 2343 2344 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2345 if (idx) { 2346 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2347 } 2348 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2349 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2350 2351 for (i=0; i<A->rmap->n; i++) { 2352 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2353 va[i] = vb[i]; 2354 if (idx) idx[i] = a->garray[idxb[i]]; 2355 } 2356 } 2357 2358 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2360 ierr = PetscFree(idxb);CHKERRQ(ierr); 2361 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2367 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2368 { 2369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2370 PetscInt n = A->rmap->n; 2371 PetscInt cstart = A->cmap->rstart; 2372 PetscInt *cmap = mat->garray; 2373 PetscInt *diagIdx, *offdiagIdx; 2374 Vec diagV, offdiagV; 2375 PetscScalar *a, *diagA, *offdiagA; 2376 PetscInt r; 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2381 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2382 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2383 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2384 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2385 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2386 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2387 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2388 for (r = 0; r < n; ++r) { 2389 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2390 a[r] = diagA[r]; 2391 idx[r] = cstart + diagIdx[r]; 2392 } else { 2393 a[r] = offdiagA[r]; 2394 idx[r] = cmap[offdiagIdx[r]]; 2395 } 2396 } 2397 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2398 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2399 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2401 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2402 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2408 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2409 { 2410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2411 PetscInt n = A->rmap->n; 2412 PetscInt cstart = A->cmap->rstart; 2413 PetscInt *cmap = mat->garray; 2414 PetscInt *diagIdx, *offdiagIdx; 2415 Vec diagV, offdiagV; 2416 PetscScalar *a, *diagA, *offdiagA; 2417 PetscInt r; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2423 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2424 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2425 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2426 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2427 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2428 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2429 for (r = 0; r < n; ++r) { 2430 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2431 a[r] = diagA[r]; 2432 idx[r] = cstart + diagIdx[r]; 2433 } else { 2434 a[r] = offdiagA[r]; 2435 idx[r] = cmap[offdiagIdx[r]]; 2436 } 2437 } 2438 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2439 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2441 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2442 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2443 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2449 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2450 { 2451 PetscErrorCode ierr; 2452 Mat *dummy; 2453 2454 PetscFunctionBegin; 2455 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2456 *newmat = *dummy; 2457 ierr = PetscFree(dummy);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2463 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2464 { 2465 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2466 PetscErrorCode ierr; 2467 2468 PetscFunctionBegin; 2469 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2470 A->factorerrortype = a->A->factorerrortype; 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2476 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2480 2481 PetscFunctionBegin; 2482 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2483 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2484 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2492 { 2493 PetscFunctionBegin; 2494 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2495 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2501 /*@ 2502 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2503 2504 Collective on Mat 2505 2506 Input Parameters: 2507 + A - the matrix 2508 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2509 2510 Level: advanced 2511 2512 @*/ 2513 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2514 { 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2525 { 2526 PetscErrorCode ierr; 2527 PetscBool sc = PETSC_FALSE,flg; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2531 ierr = PetscObjectOptionsBegin((PetscObject)A); 2532 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2533 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2534 if (flg) { 2535 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2536 } 2537 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatShift_MPIAIJ" 2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2547 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2570 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2571 if (d) { 2572 PetscInt rstart; 2573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2574 *d += rstart; 2575 2576 } 2577 PetscFunctionReturn(0); 2578 } 2579 2580 2581 /* -------------------------------------------------------------------*/ 2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2583 MatGetRow_MPIAIJ, 2584 MatRestoreRow_MPIAIJ, 2585 MatMult_MPIAIJ, 2586 /* 4*/ MatMultAdd_MPIAIJ, 2587 MatMultTranspose_MPIAIJ, 2588 MatMultTransposeAdd_MPIAIJ, 2589 0, 2590 0, 2591 0, 2592 /*10*/ 0, 2593 0, 2594 0, 2595 MatSOR_MPIAIJ, 2596 MatTranspose_MPIAIJ, 2597 /*15*/ MatGetInfo_MPIAIJ, 2598 MatEqual_MPIAIJ, 2599 MatGetDiagonal_MPIAIJ, 2600 MatDiagonalScale_MPIAIJ, 2601 MatNorm_MPIAIJ, 2602 /*20*/ MatAssemblyBegin_MPIAIJ, 2603 MatAssemblyEnd_MPIAIJ, 2604 MatSetOption_MPIAIJ, 2605 MatZeroEntries_MPIAIJ, 2606 /*24*/ MatZeroRows_MPIAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*29*/ MatSetUp_MPIAIJ, 2612 0, 2613 0, 2614 MatGetDiagonalBlock_MPIAIJ, 2615 0, 2616 /*34*/ MatDuplicate_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*39*/ MatAXPY_MPIAIJ, 2622 MatGetSubMatrices_MPIAIJ, 2623 MatIncreaseOverlap_MPIAIJ, 2624 MatGetValues_MPIAIJ, 2625 MatCopy_MPIAIJ, 2626 /*44*/ MatGetRowMax_MPIAIJ, 2627 MatScale_MPIAIJ, 2628 MatShift_MPIAIJ, 2629 MatDiagonalSet_MPIAIJ, 2630 MatZeroRowsColumns_MPIAIJ, 2631 /*49*/ MatSetRandom_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*54*/ MatFDColoringCreate_MPIXAIJ, 2637 0, 2638 MatSetUnfactored_MPIAIJ, 2639 MatPermute_MPIAIJ, 2640 0, 2641 /*59*/ MatGetSubMatrix_MPIAIJ, 2642 MatDestroy_MPIAIJ, 2643 MatView_MPIAIJ, 2644 0, 2645 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2646 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2647 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2652 MatGetRowMinAbs_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*75*/ MatFDColoringApply_AIJ, 2658 MatSetFromOptions_MPIAIJ, 2659 0, 2660 0, 2661 MatFindZeroDiagonals_MPIAIJ, 2662 /*80*/ 0, 2663 0, 2664 0, 2665 /*83*/ MatLoad_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2672 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2673 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2674 MatPtAP_MPIAIJ_MPIAIJ, 2675 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2676 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*99*/ 0, 2682 0, 2683 0, 2684 MatConjugate_MPIAIJ, 2685 0, 2686 /*104*/MatSetValuesRow_MPIAIJ, 2687 MatRealPart_MPIAIJ, 2688 MatImaginaryPart_MPIAIJ, 2689 0, 2690 0, 2691 /*109*/0, 2692 0, 2693 MatGetRowMin_MPIAIJ, 2694 0, 2695 MatMissingDiagonal_MPIAIJ, 2696 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2697 0, 2698 MatGetGhosts_MPIAIJ, 2699 0, 2700 0, 2701 /*119*/0, 2702 0, 2703 0, 2704 0, 2705 MatGetMultiProcBlock_MPIAIJ, 2706 /*124*/MatFindNonzeroRows_MPIAIJ, 2707 MatGetColumnNorms_MPIAIJ, 2708 MatInvertBlockDiagonal_MPIAIJ, 2709 0, 2710 MatGetSubMatricesMPI_MPIAIJ, 2711 /*129*/0, 2712 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2713 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2715 0, 2716 /*134*/0, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*139*/MatSetBlockSizes_MPIAIJ, 2722 0, 2723 0, 2724 MatFDColoringSetUp_MPIXAIJ, 2725 MatFindOffBlockDiagonalEntries_MPIAIJ, 2726 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2727 }; 2728 2729 /* ----------------------------------------------------------------------------------------*/ 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2733 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2746 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 b = (Mat_MPIAIJ*)B->data; 2768 2769 #if defined(PETSC_USE_CTABLE) 2770 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2771 #else 2772 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2773 #endif 2774 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2775 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2776 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2777 2778 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2779 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2780 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2781 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2782 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2783 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2785 2786 if (!B->preallocated) { 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2790 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2792 } 2793 2794 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2795 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2796 B->preallocated = PETSC_TRUE; 2797 B->was_assembled = PETSC_FALSE; 2798 B->assembled = PETSC_FALSE;; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2805 { 2806 Mat mat; 2807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 *newmat = 0; 2812 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2814 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2815 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2816 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2817 a = (Mat_MPIAIJ*)mat->data; 2818 2819 mat->factortype = matin->factortype; 2820 mat->assembled = PETSC_TRUE; 2821 mat->insertmode = NOT_SET_VALUES; 2822 mat->preallocated = PETSC_TRUE; 2823 2824 a->size = oldmat->size; 2825 a->rank = oldmat->rank; 2826 a->donotstash = oldmat->donotstash; 2827 a->roworiented = oldmat->roworiented; 2828 a->rowindices = 0; 2829 a->rowvalues = 0; 2830 a->getrowactive = PETSC_FALSE; 2831 2832 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2834 2835 if (oldmat->colmap) { 2836 #if defined(PETSC_USE_CTABLE) 2837 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2840 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 #endif 2843 } else a->colmap = 0; 2844 if (oldmat->garray) { 2845 PetscInt len; 2846 len = oldmat->B->cmap->n; 2847 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2848 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2849 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2850 } else a->garray = 0; 2851 2852 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2854 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2856 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2860 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2861 *newmat = mat; 2862 PetscFunctionReturn(0); 2863 } 2864 2865 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatLoad_MPIAIJ" 2869 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2870 { 2871 PetscScalar *vals,*svals; 2872 MPI_Comm comm; 2873 PetscErrorCode ierr; 2874 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2875 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2876 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2877 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2878 PetscInt cend,cstart,n,*rowners; 2879 int fd; 2880 PetscInt bs = newMat->rmap->bs; 2881 2882 PetscFunctionBegin; 2883 /* force binary viewer to load .info file if it has not yet done so */ 2884 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2885 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 if (!rank) { 2890 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2891 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2892 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2893 } 2894 2895 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2896 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2897 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2898 if (bs < 0) bs = 1; 2899 2900 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2901 M = header[1]; N = header[2]; 2902 2903 /* If global sizes are set, check if they are consistent with that given in the file */ 2904 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2905 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2906 2907 /* determine ownership of all (block) rows */ 2908 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2909 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2910 else m = newMat->rmap->n; /* Set by user */ 2911 2912 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2914 2915 /* First process needs enough room for process with most rows */ 2916 if (!rank) { 2917 mmax = rowners[1]; 2918 for (i=2; i<=size; i++) { 2919 mmax = PetscMax(mmax, rowners[i]); 2920 } 2921 } else mmax = -1; /* unused, but compilers complain */ 2922 2923 rowners[0] = 0; 2924 for (i=2; i<=size; i++) { 2925 rowners[i] += rowners[i-1]; 2926 } 2927 rstart = rowners[rank]; 2928 rend = rowners[rank+1]; 2929 2930 /* distribute row lengths to all processors */ 2931 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2932 if (!rank) { 2933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2934 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2935 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2936 for (j=0; j<m; j++) { 2937 procsnz[0] += ourlens[j]; 2938 } 2939 for (i=1; i<size; i++) { 2940 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2941 /* calculate the number of nonzeros on each processor */ 2942 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2943 procsnz[i] += rowlengths[j]; 2944 } 2945 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2946 } 2947 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2948 } else { 2949 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2950 } 2951 2952 if (!rank) { 2953 /* determine max buffer needed and allocate it */ 2954 maxnz = 0; 2955 for (i=0; i<size; i++) { 2956 maxnz = PetscMax(maxnz,procsnz[i]); 2957 } 2958 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2959 2960 /* read in my part of the matrix column indices */ 2961 nz = procsnz[0]; 2962 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2963 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2964 2965 /* read in every one elses and ship off */ 2966 for (i=1; i<size; i++) { 2967 nz = procsnz[i]; 2968 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2969 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2970 } 2971 ierr = PetscFree(cols);CHKERRQ(ierr); 2972 } else { 2973 /* determine buffer space needed for message */ 2974 nz = 0; 2975 for (i=0; i<m; i++) { 2976 nz += ourlens[i]; 2977 } 2978 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2979 2980 /* receive message of column indices*/ 2981 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2982 } 2983 2984 /* determine column ownership if matrix is not square */ 2985 if (N != M) { 2986 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2987 else n = newMat->cmap->n; 2988 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2989 cstart = cend - n; 2990 } else { 2991 cstart = rstart; 2992 cend = rend; 2993 n = cend - cstart; 2994 } 2995 2996 /* loop over local rows, determining number of off diagonal entries */ 2997 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2998 jj = 0; 2999 for (i=0; i<m; i++) { 3000 for (j=0; j<ourlens[i]; j++) { 3001 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3002 jj++; 3003 } 3004 } 3005 3006 for (i=0; i<m; i++) { 3007 ourlens[i] -= offlens[i]; 3008 } 3009 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3010 3011 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3012 3013 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3014 3015 for (i=0; i<m; i++) { 3016 ourlens[i] += offlens[i]; 3017 } 3018 3019 if (!rank) { 3020 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3021 3022 /* read in my part of the matrix numerical values */ 3023 nz = procsnz[0]; 3024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3025 3026 /* insert into matrix */ 3027 jj = rstart; 3028 smycols = mycols; 3029 svals = vals; 3030 for (i=0; i<m; i++) { 3031 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3032 smycols += ourlens[i]; 3033 svals += ourlens[i]; 3034 jj++; 3035 } 3036 3037 /* read in other processors and ship out */ 3038 for (i=1; i<size; i++) { 3039 nz = procsnz[i]; 3040 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3041 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3044 } else { 3045 /* receive numeric values */ 3046 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3047 3048 /* receive message of values*/ 3049 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3050 3051 /* insert into matrix */ 3052 jj = rstart; 3053 smycols = mycols; 3054 svals = vals; 3055 for (i=0; i<m; i++) { 3056 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 smycols += ourlens[i]; 3058 svals += ourlens[i]; 3059 jj++; 3060 } 3061 } 3062 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3063 ierr = PetscFree(vals);CHKERRQ(ierr); 3064 ierr = PetscFree(mycols);CHKERRQ(ierr); 3065 ierr = PetscFree(rowners);CHKERRQ(ierr); 3066 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3073 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3075 { 3076 PetscErrorCode ierr; 3077 IS iscol_local; 3078 PetscInt csize; 3079 3080 PetscFunctionBegin; 3081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3082 if (call == MAT_REUSE_MATRIX) { 3083 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3084 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3085 } else { 3086 /* check if we are grabbing all columns*/ 3087 PetscBool isstride; 3088 PetscMPIInt lisstride = 0,gisstride; 3089 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3090 if (isstride) { 3091 PetscInt start,len,mstart,mlen; 3092 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3093 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3094 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3095 if (mstart == start && mlen-mstart == len) lisstride = 1; 3096 } 3097 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3098 if (gisstride) { 3099 PetscInt N; 3100 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3101 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3102 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3103 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3104 } else { 3105 PetscInt cbs; 3106 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3107 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3108 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3109 } 3110 } 3111 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3112 if (call == MAT_INITIAL_MATRIX) { 3113 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3114 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3115 } 3116 PetscFunctionReturn(0); 3117 } 3118 3119 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3122 /* 3123 Not great since it makes two copies of the submatrix, first an SeqAIJ 3124 in local and then by concatenating the local matrices the end result. 3125 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3126 3127 Note: This requires a sequential iscol with all indices. 3128 */ 3129 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 PetscMPIInt rank,size; 3133 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3134 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3135 PetscBool allcolumns, colflag; 3136 Mat M,Mreuse; 3137 MatScalar *vwork,*aa; 3138 MPI_Comm comm; 3139 Mat_SeqAIJ *aij; 3140 3141 PetscFunctionBegin; 3142 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3143 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3145 3146 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3147 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3148 if (colflag && ncol == mat->cmap->N) { 3149 allcolumns = PETSC_TRUE; 3150 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3151 } else { 3152 allcolumns = PETSC_FALSE; 3153 } 3154 if (call == MAT_REUSE_MATRIX) { 3155 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3156 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3157 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3158 } else { 3159 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3160 } 3161 3162 /* 3163 m - number of local rows 3164 n - number of columns (same on all processors) 3165 rstart - first row in new global matrix generated 3166 */ 3167 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3168 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3169 if (call == MAT_INITIAL_MATRIX) { 3170 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3171 ii = aij->i; 3172 jj = aij->j; 3173 3174 /* 3175 Determine the number of non-zeros in the diagonal and off-diagonal 3176 portions of the matrix in order to do correct preallocation 3177 */ 3178 3179 /* first get start and end of "diagonal" columns */ 3180 if (csize == PETSC_DECIDE) { 3181 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3182 if (mglobal == n) { /* square matrix */ 3183 nlocal = m; 3184 } else { 3185 nlocal = n/size + ((n % size) > rank); 3186 } 3187 } else { 3188 nlocal = csize; 3189 } 3190 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3191 rstart = rend - nlocal; 3192 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3193 3194 /* next, compute all the lengths */ 3195 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3196 olens = dlens + m; 3197 for (i=0; i<m; i++) { 3198 jend = ii[i+1] - ii[i]; 3199 olen = 0; 3200 dlen = 0; 3201 for (j=0; j<jend; j++) { 3202 if (*jj < rstart || *jj >= rend) olen++; 3203 else dlen++; 3204 jj++; 3205 } 3206 olens[i] = olen; 3207 dlens[i] = dlen; 3208 } 3209 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3210 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3211 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3212 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3213 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3214 ierr = PetscFree(dlens);CHKERRQ(ierr); 3215 } else { 3216 PetscInt ml,nl; 3217 3218 M = *newmat; 3219 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3220 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3221 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3222 /* 3223 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3224 rather than the slower MatSetValues(). 3225 */ 3226 M->was_assembled = PETSC_TRUE; 3227 M->assembled = PETSC_FALSE; 3228 } 3229 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3230 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3231 ii = aij->i; 3232 jj = aij->j; 3233 aa = aij->a; 3234 for (i=0; i<m; i++) { 3235 row = rstart + i; 3236 nz = ii[i+1] - ii[i]; 3237 cwork = jj; jj += nz; 3238 vwork = aa; aa += nz; 3239 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3240 } 3241 3242 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 *newmat = M; 3245 3246 /* save submatrix used in processor for next request */ 3247 if (call == MAT_INITIAL_MATRIX) { 3248 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3249 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3250 } 3251 PetscFunctionReturn(0); 3252 } 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3256 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3257 { 3258 PetscInt m,cstart, cend,j,nnz,i,d; 3259 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3260 const PetscInt *JJ; 3261 PetscScalar *values; 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3266 3267 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3268 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3269 m = B->rmap->n; 3270 cstart = B->cmap->rstart; 3271 cend = B->cmap->rend; 3272 rstart = B->rmap->rstart; 3273 3274 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3275 3276 #if defined(PETSC_USE_DEBUGGING) 3277 for (i=0; i<m; i++) { 3278 nnz = Ii[i+1]- Ii[i]; 3279 JJ = J + Ii[i]; 3280 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3281 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3282 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3283 } 3284 #endif 3285 3286 for (i=0; i<m; i++) { 3287 nnz = Ii[i+1]- Ii[i]; 3288 JJ = J + Ii[i]; 3289 nnz_max = PetscMax(nnz_max,nnz); 3290 d = 0; 3291 for (j=0; j<nnz; j++) { 3292 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3293 } 3294 d_nnz[i] = d; 3295 o_nnz[i] = nnz - d; 3296 } 3297 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3298 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3299 3300 if (v) values = (PetscScalar*)v; 3301 else { 3302 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3303 } 3304 3305 for (i=0; i<m; i++) { 3306 ii = i + rstart; 3307 nnz = Ii[i+1]- Ii[i]; 3308 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3309 } 3310 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 3313 if (!v) { 3314 ierr = PetscFree(values);CHKERRQ(ierr); 3315 } 3316 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 #undef __FUNCT__ 3321 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3322 /*@ 3323 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3324 (the default parallel PETSc format). 3325 3326 Collective on MPI_Comm 3327 3328 Input Parameters: 3329 + B - the matrix 3330 . i - the indices into j for the start of each local row (starts with zero) 3331 . j - the column indices for each local row (starts with zero) 3332 - v - optional values in the matrix 3333 3334 Level: developer 3335 3336 Notes: 3337 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3338 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3339 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3340 3341 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3342 3343 The format which is used for the sparse matrix input, is equivalent to a 3344 row-major ordering.. i.e for the following matrix, the input data expected is 3345 as shown 3346 3347 $ 1 0 0 3348 $ 2 0 3 P0 3349 $ ------- 3350 $ 4 5 6 P1 3351 $ 3352 $ Process0 [P0]: rows_owned=[0,1] 3353 $ i = {0,1,3} [size = nrow+1 = 2+1] 3354 $ j = {0,0,2} [size = 3] 3355 $ v = {1,2,3} [size = 3] 3356 $ 3357 $ Process1 [P1]: rows_owned=[2] 3358 $ i = {0,3} [size = nrow+1 = 1+1] 3359 $ j = {0,1,2} [size = 3] 3360 $ v = {4,5,6} [size = 3] 3361 3362 .keywords: matrix, aij, compressed row, sparse, parallel 3363 3364 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3365 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3366 @*/ 3367 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3368 { 3369 PetscErrorCode ierr; 3370 3371 PetscFunctionBegin; 3372 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3378 /*@C 3379 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3380 (the default parallel PETSc format). For good matrix assembly performance 3381 the user should preallocate the matrix storage by setting the parameters 3382 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3383 performance can be increased by more than a factor of 50. 3384 3385 Collective on MPI_Comm 3386 3387 Input Parameters: 3388 + B - the matrix 3389 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3390 (same value is used for all local rows) 3391 . d_nnz - array containing the number of nonzeros in the various rows of the 3392 DIAGONAL portion of the local submatrix (possibly different for each row) 3393 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3394 The size of this array is equal to the number of local rows, i.e 'm'. 3395 For matrices that will be factored, you must leave room for (and set) 3396 the diagonal entry even if it is zero. 3397 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3398 submatrix (same value is used for all local rows). 3399 - o_nnz - array containing the number of nonzeros in the various rows of the 3400 OFF-DIAGONAL portion of the local submatrix (possibly different for 3401 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3402 structure. The size of this array is equal to the number 3403 of local rows, i.e 'm'. 3404 3405 If the *_nnz parameter is given then the *_nz parameter is ignored 3406 3407 The AIJ format (also called the Yale sparse matrix format or 3408 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3409 storage. The stored row and column indices begin with zero. 3410 See Users-Manual: ch_mat for details. 3411 3412 The parallel matrix is partitioned such that the first m0 rows belong to 3413 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3414 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3415 3416 The DIAGONAL portion of the local submatrix of a processor can be defined 3417 as the submatrix which is obtained by extraction the part corresponding to 3418 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3419 first row that belongs to the processor, r2 is the last row belonging to 3420 the this processor, and c1-c2 is range of indices of the local part of a 3421 vector suitable for applying the matrix to. This is an mxn matrix. In the 3422 common case of a square matrix, the row and column ranges are the same and 3423 the DIAGONAL part is also square. The remaining portion of the local 3424 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3425 3426 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3427 3428 You can call MatGetInfo() to get information on how effective the preallocation was; 3429 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3430 You can also run with the option -info and look for messages with the string 3431 malloc in them to see if additional memory allocation was needed. 3432 3433 Example usage: 3434 3435 Consider the following 8x8 matrix with 34 non-zero values, that is 3436 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3437 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3438 as follows: 3439 3440 .vb 3441 1 2 0 | 0 3 0 | 0 4 3442 Proc0 0 5 6 | 7 0 0 | 8 0 3443 9 0 10 | 11 0 0 | 12 0 3444 ------------------------------------- 3445 13 0 14 | 15 16 17 | 0 0 3446 Proc1 0 18 0 | 19 20 21 | 0 0 3447 0 0 0 | 22 23 0 | 24 0 3448 ------------------------------------- 3449 Proc2 25 26 27 | 0 0 28 | 29 0 3450 30 0 0 | 31 32 33 | 0 34 3451 .ve 3452 3453 This can be represented as a collection of submatrices as: 3454 3455 .vb 3456 A B C 3457 D E F 3458 G H I 3459 .ve 3460 3461 Where the submatrices A,B,C are owned by proc0, D,E,F are 3462 owned by proc1, G,H,I are owned by proc2. 3463 3464 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3465 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3466 The 'M','N' parameters are 8,8, and have the same values on all procs. 3467 3468 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3469 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3470 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3471 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3472 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3473 matrix, ans [DF] as another SeqAIJ matrix. 3474 3475 When d_nz, o_nz parameters are specified, d_nz storage elements are 3476 allocated for every row of the local diagonal submatrix, and o_nz 3477 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3478 One way to choose d_nz and o_nz is to use the max nonzerors per local 3479 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3480 In this case, the values of d_nz,o_nz are: 3481 .vb 3482 proc0 : dnz = 2, o_nz = 2 3483 proc1 : dnz = 3, o_nz = 2 3484 proc2 : dnz = 1, o_nz = 4 3485 .ve 3486 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3487 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3488 for proc3. i.e we are using 12+15+10=37 storage locations to store 3489 34 values. 3490 3491 When d_nnz, o_nnz parameters are specified, the storage is specified 3492 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3493 In the above case the values for d_nnz,o_nnz are: 3494 .vb 3495 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3496 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3497 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3498 .ve 3499 Here the space allocated is sum of all the above values i.e 34, and 3500 hence pre-allocation is perfect. 3501 3502 Level: intermediate 3503 3504 .keywords: matrix, aij, compressed row, sparse, parallel 3505 3506 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3507 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3508 @*/ 3509 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3510 { 3511 PetscErrorCode ierr; 3512 3513 PetscFunctionBegin; 3514 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3515 PetscValidType(B,1); 3516 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3522 /*@ 3523 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3524 CSR format the local rows. 3525 3526 Collective on MPI_Comm 3527 3528 Input Parameters: 3529 + comm - MPI communicator 3530 . m - number of local rows (Cannot be PETSC_DECIDE) 3531 . n - This value should be the same as the local size used in creating the 3532 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3533 calculated if N is given) For square matrices n is almost always m. 3534 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3535 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3536 . i - row indices 3537 . j - column indices 3538 - a - matrix values 3539 3540 Output Parameter: 3541 . mat - the matrix 3542 3543 Level: intermediate 3544 3545 Notes: 3546 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3547 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3548 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3549 3550 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3551 3552 The format which is used for the sparse matrix input, is equivalent to a 3553 row-major ordering.. i.e for the following matrix, the input data expected is 3554 as shown 3555 3556 $ 1 0 0 3557 $ 2 0 3 P0 3558 $ ------- 3559 $ 4 5 6 P1 3560 $ 3561 $ Process0 [P0]: rows_owned=[0,1] 3562 $ i = {0,1,3} [size = nrow+1 = 2+1] 3563 $ j = {0,0,2} [size = 3] 3564 $ v = {1,2,3} [size = 3] 3565 $ 3566 $ Process1 [P1]: rows_owned=[2] 3567 $ i = {0,3} [size = nrow+1 = 1+1] 3568 $ j = {0,1,2} [size = 3] 3569 $ v = {4,5,6} [size = 3] 3570 3571 .keywords: matrix, aij, compressed row, sparse, parallel 3572 3573 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3574 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3575 @*/ 3576 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3577 { 3578 PetscErrorCode ierr; 3579 3580 PetscFunctionBegin; 3581 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3582 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3583 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3584 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3585 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3586 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3587 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590 3591 #undef __FUNCT__ 3592 #define __FUNCT__ "MatCreateAIJ" 3593 /*@C 3594 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3595 (the default parallel PETSc format). For good matrix assembly performance 3596 the user should preallocate the matrix storage by setting the parameters 3597 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3598 performance can be increased by more than a factor of 50. 3599 3600 Collective on MPI_Comm 3601 3602 Input Parameters: 3603 + comm - MPI communicator 3604 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3605 This value should be the same as the local size used in creating the 3606 y vector for the matrix-vector product y = Ax. 3607 . n - This value should be the same as the local size used in creating the 3608 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3609 calculated if N is given) For square matrices n is almost always m. 3610 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3611 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3612 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3613 (same value is used for all local rows) 3614 . d_nnz - array containing the number of nonzeros in the various rows of the 3615 DIAGONAL portion of the local submatrix (possibly different for each row) 3616 or NULL, if d_nz is used to specify the nonzero structure. 3617 The size of this array is equal to the number of local rows, i.e 'm'. 3618 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3619 submatrix (same value is used for all local rows). 3620 - o_nnz - array containing the number of nonzeros in the various rows of the 3621 OFF-DIAGONAL portion of the local submatrix (possibly different for 3622 each row) or NULL, if o_nz is used to specify the nonzero 3623 structure. The size of this array is equal to the number 3624 of local rows, i.e 'm'. 3625 3626 Output Parameter: 3627 . A - the matrix 3628 3629 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3630 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3631 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3632 3633 Notes: 3634 If the *_nnz parameter is given then the *_nz parameter is ignored 3635 3636 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3637 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3638 storage requirements for this matrix. 3639 3640 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3641 processor than it must be used on all processors that share the object for 3642 that argument. 3643 3644 The user MUST specify either the local or global matrix dimensions 3645 (possibly both). 3646 3647 The parallel matrix is partitioned across processors such that the 3648 first m0 rows belong to process 0, the next m1 rows belong to 3649 process 1, the next m2 rows belong to process 2 etc.. where 3650 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3651 values corresponding to [m x N] submatrix. 3652 3653 The columns are logically partitioned with the n0 columns belonging 3654 to 0th partition, the next n1 columns belonging to the next 3655 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3656 3657 The DIAGONAL portion of the local submatrix on any given processor 3658 is the submatrix corresponding to the rows and columns m,n 3659 corresponding to the given processor. i.e diagonal matrix on 3660 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3661 etc. The remaining portion of the local submatrix [m x (N-n)] 3662 constitute the OFF-DIAGONAL portion. The example below better 3663 illustrates this concept. 3664 3665 For a square global matrix we define each processor's diagonal portion 3666 to be its local rows and the corresponding columns (a square submatrix); 3667 each processor's off-diagonal portion encompasses the remainder of the 3668 local matrix (a rectangular submatrix). 3669 3670 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3671 3672 When calling this routine with a single process communicator, a matrix of 3673 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3674 type of communicator, use the construction mechanism: 3675 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3676 3677 By default, this format uses inodes (identical nodes) when possible. 3678 We search for consecutive rows with the same nonzero structure, thereby 3679 reusing matrix information to achieve increased efficiency. 3680 3681 Options Database Keys: 3682 + -mat_no_inode - Do not use inodes 3683 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3684 - -mat_aij_oneindex - Internally use indexing starting at 1 3685 rather than 0. Note that when calling MatSetValues(), 3686 the user still MUST index entries starting at 0! 3687 3688 3689 Example usage: 3690 3691 Consider the following 8x8 matrix with 34 non-zero values, that is 3692 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3693 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3694 as follows: 3695 3696 .vb 3697 1 2 0 | 0 3 0 | 0 4 3698 Proc0 0 5 6 | 7 0 0 | 8 0 3699 9 0 10 | 11 0 0 | 12 0 3700 ------------------------------------- 3701 13 0 14 | 15 16 17 | 0 0 3702 Proc1 0 18 0 | 19 20 21 | 0 0 3703 0 0 0 | 22 23 0 | 24 0 3704 ------------------------------------- 3705 Proc2 25 26 27 | 0 0 28 | 29 0 3706 30 0 0 | 31 32 33 | 0 34 3707 .ve 3708 3709 This can be represented as a collection of submatrices as: 3710 3711 .vb 3712 A B C 3713 D E F 3714 G H I 3715 .ve 3716 3717 Where the submatrices A,B,C are owned by proc0, D,E,F are 3718 owned by proc1, G,H,I are owned by proc2. 3719 3720 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3721 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3722 The 'M','N' parameters are 8,8, and have the same values on all procs. 3723 3724 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3725 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3726 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3727 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3728 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3729 matrix, ans [DF] as another SeqAIJ matrix. 3730 3731 When d_nz, o_nz parameters are specified, d_nz storage elements are 3732 allocated for every row of the local diagonal submatrix, and o_nz 3733 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3734 One way to choose d_nz and o_nz is to use the max nonzerors per local 3735 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3736 In this case, the values of d_nz,o_nz are: 3737 .vb 3738 proc0 : dnz = 2, o_nz = 2 3739 proc1 : dnz = 3, o_nz = 2 3740 proc2 : dnz = 1, o_nz = 4 3741 .ve 3742 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3743 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3744 for proc3. i.e we are using 12+15+10=37 storage locations to store 3745 34 values. 3746 3747 When d_nnz, o_nnz parameters are specified, the storage is specified 3748 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3749 In the above case the values for d_nnz,o_nnz are: 3750 .vb 3751 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3752 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3753 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3754 .ve 3755 Here the space allocated is sum of all the above values i.e 34, and 3756 hence pre-allocation is perfect. 3757 3758 Level: intermediate 3759 3760 .keywords: matrix, aij, compressed row, sparse, parallel 3761 3762 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3763 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3764 @*/ 3765 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3766 { 3767 PetscErrorCode ierr; 3768 PetscMPIInt size; 3769 3770 PetscFunctionBegin; 3771 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3772 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3773 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3774 if (size > 1) { 3775 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3776 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3777 } else { 3778 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3779 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3780 } 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3786 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3787 { 3788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3789 PetscBool flg; 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3794 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3795 if (Ad) *Ad = a->A; 3796 if (Ao) *Ao = a->B; 3797 if (colmap) *colmap = a->garray; 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3804 { 3805 PetscErrorCode ierr; 3806 PetscInt m,N,i,rstart,nnz,Ii; 3807 PetscInt *indx; 3808 PetscScalar *values; 3809 3810 PetscFunctionBegin; 3811 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3812 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3813 PetscInt *dnz,*onz,sum,bs,cbs; 3814 3815 if (n == PETSC_DECIDE) { 3816 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3817 } 3818 /* Check sum(n) = N */ 3819 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3820 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3821 3822 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3823 rstart -= m; 3824 3825 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3826 for (i=0; i<m; i++) { 3827 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3828 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3829 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3830 } 3831 3832 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3833 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3834 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3835 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3836 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3837 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3838 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3839 } 3840 3841 /* numeric phase */ 3842 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3843 for (i=0; i<m; i++) { 3844 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3845 Ii = i + rstart; 3846 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3847 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3848 } 3849 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatFileSplit" 3856 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3857 { 3858 PetscErrorCode ierr; 3859 PetscMPIInt rank; 3860 PetscInt m,N,i,rstart,nnz; 3861 size_t len; 3862 const PetscInt *indx; 3863 PetscViewer out; 3864 char *name; 3865 Mat B; 3866 const PetscScalar *values; 3867 3868 PetscFunctionBegin; 3869 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3870 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3871 /* Should this be the type of the diagonal block of A? */ 3872 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3873 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3874 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3875 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3876 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3877 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3878 for (i=0; i<m; i++) { 3879 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3880 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3881 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3882 } 3883 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 3886 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3887 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3888 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3889 sprintf(name,"%s.%d",outfile,rank); 3890 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3891 ierr = PetscFree(name);CHKERRQ(ierr); 3892 ierr = MatView(B,out);CHKERRQ(ierr); 3893 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3894 ierr = MatDestroy(&B);CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3901 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3902 { 3903 PetscErrorCode ierr; 3904 Mat_Merge_SeqsToMPI *merge; 3905 PetscContainer container; 3906 3907 PetscFunctionBegin; 3908 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3909 if (container) { 3910 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3922 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3923 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3924 ierr = PetscFree(merge);CHKERRQ(ierr); 3925 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3926 } 3927 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3928 PetscFunctionReturn(0); 3929 } 3930 3931 #include <../src/mat/utils/freespace.h> 3932 #include <petscbt.h> 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3936 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3937 { 3938 PetscErrorCode ierr; 3939 MPI_Comm comm; 3940 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3941 PetscMPIInt size,rank,taga,*len_s; 3942 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3943 PetscInt proc,m; 3944 PetscInt **buf_ri,**buf_rj; 3945 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3946 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3947 MPI_Request *s_waits,*r_waits; 3948 MPI_Status *status; 3949 MatScalar *aa=a->a; 3950 MatScalar **abuf_r,*ba_i; 3951 Mat_Merge_SeqsToMPI *merge; 3952 PetscContainer container; 3953 3954 PetscFunctionBegin; 3955 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3956 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3957 3958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3959 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3960 3961 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3962 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3963 3964 bi = merge->bi; 3965 bj = merge->bj; 3966 buf_ri = merge->buf_ri; 3967 buf_rj = merge->buf_rj; 3968 3969 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3970 owners = merge->rowmap->range; 3971 len_s = merge->len_s; 3972 3973 /* send and recv matrix values */ 3974 /*-----------------------------*/ 3975 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3976 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3977 3978 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3979 for (proc=0,k=0; proc<size; proc++) { 3980 if (!len_s[proc]) continue; 3981 i = owners[proc]; 3982 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3983 k++; 3984 } 3985 3986 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3987 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3988 ierr = PetscFree(status);CHKERRQ(ierr); 3989 3990 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3991 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3992 3993 /* insert mat values of mpimat */ 3994 /*----------------------------*/ 3995 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3996 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3997 3998 for (k=0; k<merge->nrecv; k++) { 3999 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4000 nrows = *(buf_ri_k[k]); 4001 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4002 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4003 } 4004 4005 /* set values of ba */ 4006 m = merge->rowmap->n; 4007 for (i=0; i<m; i++) { 4008 arow = owners[rank] + i; 4009 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4010 bnzi = bi[i+1] - bi[i]; 4011 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4012 4013 /* add local non-zero vals of this proc's seqmat into ba */ 4014 anzi = ai[arow+1] - ai[arow]; 4015 aj = a->j + ai[arow]; 4016 aa = a->a + ai[arow]; 4017 nextaj = 0; 4018 for (j=0; nextaj<anzi; j++) { 4019 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4020 ba_i[j] += aa[nextaj++]; 4021 } 4022 } 4023 4024 /* add received vals into ba */ 4025 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4026 /* i-th row */ 4027 if (i == *nextrow[k]) { 4028 anzi = *(nextai[k]+1) - *nextai[k]; 4029 aj = buf_rj[k] + *(nextai[k]); 4030 aa = abuf_r[k] + *(nextai[k]); 4031 nextaj = 0; 4032 for (j=0; nextaj<anzi; j++) { 4033 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4034 ba_i[j] += aa[nextaj++]; 4035 } 4036 } 4037 nextrow[k]++; nextai[k]++; 4038 } 4039 } 4040 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4041 } 4042 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 4045 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4046 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4047 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4048 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4049 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4054 4055 #undef __FUNCT__ 4056 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4057 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4058 { 4059 PetscErrorCode ierr; 4060 Mat B_mpi; 4061 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4062 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4063 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4064 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4065 PetscInt len,proc,*dnz,*onz,bs,cbs; 4066 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4067 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4068 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4069 MPI_Status *status; 4070 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4071 PetscBT lnkbt; 4072 Mat_Merge_SeqsToMPI *merge; 4073 PetscContainer container; 4074 4075 PetscFunctionBegin; 4076 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4077 4078 /* make sure it is a PETSc comm */ 4079 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4081 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4082 4083 ierr = PetscNew(&merge);CHKERRQ(ierr); 4084 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4085 4086 /* determine row ownership */ 4087 /*---------------------------------------------------------*/ 4088 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4089 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4090 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4091 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4092 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4093 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4094 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4095 4096 m = merge->rowmap->n; 4097 owners = merge->rowmap->range; 4098 4099 /* determine the number of messages to send, their lengths */ 4100 /*---------------------------------------------------------*/ 4101 len_s = merge->len_s; 4102 4103 len = 0; /* length of buf_si[] */ 4104 merge->nsend = 0; 4105 for (proc=0; proc<size; proc++) { 4106 len_si[proc] = 0; 4107 if (proc == rank) { 4108 len_s[proc] = 0; 4109 } else { 4110 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4111 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4112 } 4113 if (len_s[proc]) { 4114 merge->nsend++; 4115 nrows = 0; 4116 for (i=owners[proc]; i<owners[proc+1]; i++) { 4117 if (ai[i+1] > ai[i]) nrows++; 4118 } 4119 len_si[proc] = 2*(nrows+1); 4120 len += len_si[proc]; 4121 } 4122 } 4123 4124 /* determine the number and length of messages to receive for ij-structure */ 4125 /*-------------------------------------------------------------------------*/ 4126 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4127 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4128 4129 /* post the Irecv of j-structure */ 4130 /*-------------------------------*/ 4131 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4132 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4133 4134 /* post the Isend of j-structure */ 4135 /*--------------------------------*/ 4136 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4137 4138 for (proc=0, k=0; proc<size; proc++) { 4139 if (!len_s[proc]) continue; 4140 i = owners[proc]; 4141 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4142 k++; 4143 } 4144 4145 /* receives and sends of j-structure are complete */ 4146 /*------------------------------------------------*/ 4147 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4148 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4149 4150 /* send and recv i-structure */ 4151 /*---------------------------*/ 4152 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4153 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4154 4155 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4156 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4157 for (proc=0,k=0; proc<size; proc++) { 4158 if (!len_s[proc]) continue; 4159 /* form outgoing message for i-structure: 4160 buf_si[0]: nrows to be sent 4161 [1:nrows]: row index (global) 4162 [nrows+1:2*nrows+1]: i-structure index 4163 */ 4164 /*-------------------------------------------*/ 4165 nrows = len_si[proc]/2 - 1; 4166 buf_si_i = buf_si + nrows+1; 4167 buf_si[0] = nrows; 4168 buf_si_i[0] = 0; 4169 nrows = 0; 4170 for (i=owners[proc]; i<owners[proc+1]; i++) { 4171 anzi = ai[i+1] - ai[i]; 4172 if (anzi) { 4173 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4174 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4175 nrows++; 4176 } 4177 } 4178 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4179 k++; 4180 buf_si += len_si[proc]; 4181 } 4182 4183 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4184 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4185 4186 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4187 for (i=0; i<merge->nrecv; i++) { 4188 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4189 } 4190 4191 ierr = PetscFree(len_si);CHKERRQ(ierr); 4192 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4193 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4194 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4195 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4196 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4197 ierr = PetscFree(status);CHKERRQ(ierr); 4198 4199 /* compute a local seq matrix in each processor */ 4200 /*----------------------------------------------*/ 4201 /* allocate bi array and free space for accumulating nonzero column info */ 4202 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4203 bi[0] = 0; 4204 4205 /* create and initialize a linked list */ 4206 nlnk = N+1; 4207 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4208 4209 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4210 len = ai[owners[rank+1]] - ai[owners[rank]]; 4211 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4212 4213 current_space = free_space; 4214 4215 /* determine symbolic info for each local row */ 4216 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4217 4218 for (k=0; k<merge->nrecv; k++) { 4219 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4220 nrows = *buf_ri_k[k]; 4221 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4222 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4223 } 4224 4225 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4226 len = 0; 4227 for (i=0; i<m; i++) { 4228 bnzi = 0; 4229 /* add local non-zero cols of this proc's seqmat into lnk */ 4230 arow = owners[rank] + i; 4231 anzi = ai[arow+1] - ai[arow]; 4232 aj = a->j + ai[arow]; 4233 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4234 bnzi += nlnk; 4235 /* add received col data into lnk */ 4236 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4237 if (i == *nextrow[k]) { /* i-th row */ 4238 anzi = *(nextai[k]+1) - *nextai[k]; 4239 aj = buf_rj[k] + *nextai[k]; 4240 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4241 bnzi += nlnk; 4242 nextrow[k]++; nextai[k]++; 4243 } 4244 } 4245 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4246 4247 /* if free space is not available, make more free space */ 4248 if (current_space->local_remaining<bnzi) { 4249 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4250 nspacedouble++; 4251 } 4252 /* copy data into free space, then initialize lnk */ 4253 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4254 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4255 4256 current_space->array += bnzi; 4257 current_space->local_used += bnzi; 4258 current_space->local_remaining -= bnzi; 4259 4260 bi[i+1] = bi[i] + bnzi; 4261 } 4262 4263 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4264 4265 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4266 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4267 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4268 4269 /* create symbolic parallel matrix B_mpi */ 4270 /*---------------------------------------*/ 4271 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4272 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4273 if (n==PETSC_DECIDE) { 4274 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4275 } else { 4276 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4277 } 4278 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4279 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4280 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4281 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4282 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4283 4284 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4285 B_mpi->assembled = PETSC_FALSE; 4286 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4287 merge->bi = bi; 4288 merge->bj = bj; 4289 merge->buf_ri = buf_ri; 4290 merge->buf_rj = buf_rj; 4291 merge->coi = NULL; 4292 merge->coj = NULL; 4293 merge->owners_co = NULL; 4294 4295 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4296 4297 /* attach the supporting struct to B_mpi for reuse */ 4298 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4299 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4300 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4301 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4302 *mpimat = B_mpi; 4303 4304 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4310 /*@C 4311 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4312 matrices from each processor 4313 4314 Collective on MPI_Comm 4315 4316 Input Parameters: 4317 + comm - the communicators the parallel matrix will live on 4318 . seqmat - the input sequential matrices 4319 . m - number of local rows (or PETSC_DECIDE) 4320 . n - number of local columns (or PETSC_DECIDE) 4321 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4322 4323 Output Parameter: 4324 . mpimat - the parallel matrix generated 4325 4326 Level: advanced 4327 4328 Notes: 4329 The dimensions of the sequential matrix in each processor MUST be the same. 4330 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4331 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4332 @*/ 4333 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscMPIInt size; 4337 4338 PetscFunctionBegin; 4339 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4340 if (size == 1) { 4341 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4342 if (scall == MAT_INITIAL_MATRIX) { 4343 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4344 } else { 4345 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4346 } 4347 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4348 PetscFunctionReturn(0); 4349 } 4350 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4351 if (scall == MAT_INITIAL_MATRIX) { 4352 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4353 } 4354 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4355 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4361 /*@ 4362 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4363 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4364 with MatGetSize() 4365 4366 Not Collective 4367 4368 Input Parameters: 4369 + A - the matrix 4370 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4371 4372 Output Parameter: 4373 . A_loc - the local sequential matrix generated 4374 4375 Level: developer 4376 4377 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4378 4379 @*/ 4380 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4381 { 4382 PetscErrorCode ierr; 4383 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4384 Mat_SeqAIJ *mat,*a,*b; 4385 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4386 MatScalar *aa,*ba,*cam; 4387 PetscScalar *ca; 4388 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4389 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4390 PetscBool match; 4391 MPI_Comm comm; 4392 PetscMPIInt size; 4393 4394 PetscFunctionBegin; 4395 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4396 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4397 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4399 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4400 4401 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4402 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4403 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4404 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4405 aa = a->a; ba = b->a; 4406 if (scall == MAT_INITIAL_MATRIX) { 4407 if (size == 1) { 4408 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4409 PetscFunctionReturn(0); 4410 } 4411 4412 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4413 ci[0] = 0; 4414 for (i=0; i<am; i++) { 4415 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4416 } 4417 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4418 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4419 k = 0; 4420 for (i=0; i<am; i++) { 4421 ncols_o = bi[i+1] - bi[i]; 4422 ncols_d = ai[i+1] - ai[i]; 4423 /* off-diagonal portion of A */ 4424 for (jo=0; jo<ncols_o; jo++) { 4425 col = cmap[*bj]; 4426 if (col >= cstart) break; 4427 cj[k] = col; bj++; 4428 ca[k++] = *ba++; 4429 } 4430 /* diagonal portion of A */ 4431 for (j=0; j<ncols_d; j++) { 4432 cj[k] = cstart + *aj++; 4433 ca[k++] = *aa++; 4434 } 4435 /* off-diagonal portion of A */ 4436 for (j=jo; j<ncols_o; j++) { 4437 cj[k] = cmap[*bj++]; 4438 ca[k++] = *ba++; 4439 } 4440 } 4441 /* put together the new matrix */ 4442 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4443 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4444 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4445 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4446 mat->free_a = PETSC_TRUE; 4447 mat->free_ij = PETSC_TRUE; 4448 mat->nonew = 0; 4449 } else if (scall == MAT_REUSE_MATRIX) { 4450 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4451 ci = mat->i; cj = mat->j; cam = mat->a; 4452 for (i=0; i<am; i++) { 4453 /* off-diagonal portion of A */ 4454 ncols_o = bi[i+1] - bi[i]; 4455 for (jo=0; jo<ncols_o; jo++) { 4456 col = cmap[*bj]; 4457 if (col >= cstart) break; 4458 *cam++ = *ba++; bj++; 4459 } 4460 /* diagonal portion of A */ 4461 ncols_d = ai[i+1] - ai[i]; 4462 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4463 /* off-diagonal portion of A */ 4464 for (j=jo; j<ncols_o; j++) { 4465 *cam++ = *ba++; bj++; 4466 } 4467 } 4468 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4469 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4470 PetscFunctionReturn(0); 4471 } 4472 4473 #undef __FUNCT__ 4474 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4475 /*@C 4476 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4477 4478 Not Collective 4479 4480 Input Parameters: 4481 + A - the matrix 4482 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4483 - row, col - index sets of rows and columns to extract (or NULL) 4484 4485 Output Parameter: 4486 . A_loc - the local sequential matrix generated 4487 4488 Level: developer 4489 4490 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4491 4492 @*/ 4493 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4494 { 4495 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4496 PetscErrorCode ierr; 4497 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4498 IS isrowa,iscola; 4499 Mat *aloc; 4500 PetscBool match; 4501 4502 PetscFunctionBegin; 4503 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4504 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4505 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4506 if (!row) { 4507 start = A->rmap->rstart; end = A->rmap->rend; 4508 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4509 } else { 4510 isrowa = *row; 4511 } 4512 if (!col) { 4513 start = A->cmap->rstart; 4514 cmap = a->garray; 4515 nzA = a->A->cmap->n; 4516 nzB = a->B->cmap->n; 4517 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4518 ncols = 0; 4519 for (i=0; i<nzB; i++) { 4520 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4521 else break; 4522 } 4523 imark = i; 4524 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4525 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4526 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4527 } else { 4528 iscola = *col; 4529 } 4530 if (scall != MAT_INITIAL_MATRIX) { 4531 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4532 aloc[0] = *A_loc; 4533 } 4534 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4535 *A_loc = aloc[0]; 4536 ierr = PetscFree(aloc);CHKERRQ(ierr); 4537 if (!row) { 4538 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4539 } 4540 if (!col) { 4541 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4542 } 4543 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatGetBrowsOfAcols" 4549 /*@C 4550 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4551 4552 Collective on Mat 4553 4554 Input Parameters: 4555 + A,B - the matrices in mpiaij format 4556 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4557 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4558 4559 Output Parameter: 4560 + rowb, colb - index sets of rows and columns of B to extract 4561 - B_seq - the sequential matrix generated 4562 4563 Level: developer 4564 4565 @*/ 4566 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4567 { 4568 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4569 PetscErrorCode ierr; 4570 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4571 IS isrowb,iscolb; 4572 Mat *bseq=NULL; 4573 4574 PetscFunctionBegin; 4575 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4576 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4577 } 4578 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4579 4580 if (scall == MAT_INITIAL_MATRIX) { 4581 start = A->cmap->rstart; 4582 cmap = a->garray; 4583 nzA = a->A->cmap->n; 4584 nzB = a->B->cmap->n; 4585 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4586 ncols = 0; 4587 for (i=0; i<nzB; i++) { /* row < local row index */ 4588 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4589 else break; 4590 } 4591 imark = i; 4592 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4593 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4594 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4595 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4596 } else { 4597 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4598 isrowb = *rowb; iscolb = *colb; 4599 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4600 bseq[0] = *B_seq; 4601 } 4602 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4603 *B_seq = bseq[0]; 4604 ierr = PetscFree(bseq);CHKERRQ(ierr); 4605 if (!rowb) { 4606 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4607 } else { 4608 *rowb = isrowb; 4609 } 4610 if (!colb) { 4611 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4612 } else { 4613 *colb = iscolb; 4614 } 4615 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4616 PetscFunctionReturn(0); 4617 } 4618 4619 #undef __FUNCT__ 4620 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4621 /* 4622 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4623 of the OFF-DIAGONAL portion of local A 4624 4625 Collective on Mat 4626 4627 Input Parameters: 4628 + A,B - the matrices in mpiaij format 4629 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4630 4631 Output Parameter: 4632 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4633 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4634 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4635 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4636 4637 Level: developer 4638 4639 */ 4640 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4641 { 4642 VecScatter_MPI_General *gen_to,*gen_from; 4643 PetscErrorCode ierr; 4644 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4645 Mat_SeqAIJ *b_oth; 4646 VecScatter ctx =a->Mvctx; 4647 MPI_Comm comm; 4648 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4649 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4650 PetscInt *rvalues,*svalues; 4651 MatScalar *b_otha,*bufa,*bufA; 4652 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4653 MPI_Request *rwaits = NULL,*swaits = NULL; 4654 MPI_Status *sstatus,rstatus; 4655 PetscMPIInt jj,size; 4656 PetscInt *cols,sbs,rbs; 4657 PetscScalar *vals; 4658 4659 PetscFunctionBegin; 4660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4662 4663 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4664 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4665 } 4666 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4668 4669 gen_to = (VecScatter_MPI_General*)ctx->todata; 4670 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4671 nrecvs = gen_from->n; 4672 nsends = gen_to->n; 4673 4674 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4675 srow = gen_to->indices; /* local row index to be sent */ 4676 sstarts = gen_to->starts; 4677 sprocs = gen_to->procs; 4678 sstatus = gen_to->sstatus; 4679 sbs = gen_to->bs; 4680 rstarts = gen_from->starts; 4681 rprocs = gen_from->procs; 4682 rbs = gen_from->bs; 4683 4684 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4685 if (scall == MAT_INITIAL_MATRIX) { 4686 /* i-array */ 4687 /*---------*/ 4688 /* post receives */ 4689 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 4690 for (i=0; i<nrecvs; i++) { 4691 rowlen = rvalues + rstarts[i]*rbs; 4692 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4693 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4694 } 4695 4696 /* pack the outgoing message */ 4697 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4698 4699 sstartsj[0] = 0; 4700 rstartsj[0] = 0; 4701 len = 0; /* total length of j or a array to be sent */ 4702 k = 0; 4703 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 4704 for (i=0; i<nsends; i++) { 4705 rowlen = svalues + sstarts[i]*sbs; 4706 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4707 for (j=0; j<nrows; j++) { 4708 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4709 for (l=0; l<sbs; l++) { 4710 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4711 4712 rowlen[j*sbs+l] = ncols; 4713 4714 len += ncols; 4715 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4716 } 4717 k++; 4718 } 4719 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4720 4721 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4722 } 4723 /* recvs and sends of i-array are completed */ 4724 i = nrecvs; 4725 while (i--) { 4726 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4727 } 4728 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4729 ierr = PetscFree(svalues);CHKERRQ(ierr); 4730 4731 /* allocate buffers for sending j and a arrays */ 4732 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4733 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4734 4735 /* create i-array of B_oth */ 4736 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4737 4738 b_othi[0] = 0; 4739 len = 0; /* total length of j or a array to be received */ 4740 k = 0; 4741 for (i=0; i<nrecvs; i++) { 4742 rowlen = rvalues + rstarts[i]*rbs; 4743 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4744 for (j=0; j<nrows; j++) { 4745 b_othi[k+1] = b_othi[k] + rowlen[j]; 4746 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4747 k++; 4748 } 4749 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4750 } 4751 ierr = PetscFree(rvalues);CHKERRQ(ierr); 4752 4753 /* allocate space for j and a arrrays of B_oth */ 4754 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4755 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4756 4757 /* j-array */ 4758 /*---------*/ 4759 /* post receives of j-array */ 4760 for (i=0; i<nrecvs; i++) { 4761 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4762 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4763 } 4764 4765 /* pack the outgoing message j-array */ 4766 k = 0; 4767 for (i=0; i<nsends; i++) { 4768 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4769 bufJ = bufj+sstartsj[i]; 4770 for (j=0; j<nrows; j++) { 4771 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4772 for (ll=0; ll<sbs; ll++) { 4773 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4774 for (l=0; l<ncols; l++) { 4775 *bufJ++ = cols[l]; 4776 } 4777 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4778 } 4779 } 4780 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4781 } 4782 4783 /* recvs and sends of j-array are completed */ 4784 i = nrecvs; 4785 while (i--) { 4786 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4787 } 4788 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4789 } else if (scall == MAT_REUSE_MATRIX) { 4790 sstartsj = *startsj_s; 4791 rstartsj = *startsj_r; 4792 bufa = *bufa_ptr; 4793 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4794 b_otha = b_oth->a; 4795 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4796 4797 /* a-array */ 4798 /*---------*/ 4799 /* post receives of a-array */ 4800 for (i=0; i<nrecvs; i++) { 4801 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4802 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4803 } 4804 4805 /* pack the outgoing message a-array */ 4806 k = 0; 4807 for (i=0; i<nsends; i++) { 4808 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4809 bufA = bufa+sstartsj[i]; 4810 for (j=0; j<nrows; j++) { 4811 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4812 for (ll=0; ll<sbs; ll++) { 4813 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4814 for (l=0; l<ncols; l++) { 4815 *bufA++ = vals[l]; 4816 } 4817 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4818 } 4819 } 4820 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4821 } 4822 /* recvs and sends of a-array are completed */ 4823 i = nrecvs; 4824 while (i--) { 4825 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4826 } 4827 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4828 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4829 4830 if (scall == MAT_INITIAL_MATRIX) { 4831 /* put together the new matrix */ 4832 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4833 4834 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4835 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4836 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4837 b_oth->free_a = PETSC_TRUE; 4838 b_oth->free_ij = PETSC_TRUE; 4839 b_oth->nonew = 0; 4840 4841 ierr = PetscFree(bufj);CHKERRQ(ierr); 4842 if (!startsj_s || !bufa_ptr) { 4843 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4844 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4845 } else { 4846 *startsj_s = sstartsj; 4847 *startsj_r = rstartsj; 4848 *bufa_ptr = bufa; 4849 } 4850 } 4851 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 #undef __FUNCT__ 4856 #define __FUNCT__ "MatGetCommunicationStructs" 4857 /*@C 4858 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4859 4860 Not Collective 4861 4862 Input Parameters: 4863 . A - The matrix in mpiaij format 4864 4865 Output Parameter: 4866 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4867 . colmap - A map from global column index to local index into lvec 4868 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4869 4870 Level: developer 4871 4872 @*/ 4873 #if defined(PETSC_USE_CTABLE) 4874 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4875 #else 4876 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4877 #endif 4878 { 4879 Mat_MPIAIJ *a; 4880 4881 PetscFunctionBegin; 4882 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4883 PetscValidPointer(lvec, 2); 4884 PetscValidPointer(colmap, 3); 4885 PetscValidPointer(multScatter, 4); 4886 a = (Mat_MPIAIJ*) A->data; 4887 if (lvec) *lvec = a->lvec; 4888 if (colmap) *colmap = a->colmap; 4889 if (multScatter) *multScatter = a->Mvctx; 4890 PetscFunctionReturn(0); 4891 } 4892 4893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4896 #if defined(PETSC_HAVE_ELEMENTAL) 4897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4898 #endif 4899 #if defined(PETSC_HAVE_HYPRE) 4900 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4901 #endif 4902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4903 4904 #undef __FUNCT__ 4905 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4906 /* 4907 Computes (B'*A')' since computing B*A directly is untenable 4908 4909 n p p 4910 ( ) ( ) ( ) 4911 m ( A ) * n ( B ) = m ( C ) 4912 ( ) ( ) ( ) 4913 4914 */ 4915 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4916 { 4917 PetscErrorCode ierr; 4918 Mat At,Bt,Ct; 4919 4920 PetscFunctionBegin; 4921 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4922 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4923 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4924 ierr = MatDestroy(&At);CHKERRQ(ierr); 4925 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4926 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4927 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4928 PetscFunctionReturn(0); 4929 } 4930 4931 #undef __FUNCT__ 4932 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4933 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4934 { 4935 PetscErrorCode ierr; 4936 PetscInt m=A->rmap->n,n=B->cmap->n; 4937 Mat Cmat; 4938 4939 PetscFunctionBegin; 4940 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4941 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4942 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4943 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4944 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4945 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4946 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4947 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4948 4949 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4950 4951 *C = Cmat; 4952 PetscFunctionReturn(0); 4953 } 4954 4955 /* ----------------------------------------------------------------*/ 4956 #undef __FUNCT__ 4957 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4958 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4959 { 4960 PetscErrorCode ierr; 4961 4962 PetscFunctionBegin; 4963 if (scall == MAT_INITIAL_MATRIX) { 4964 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4965 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4966 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4967 } 4968 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4969 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4970 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4971 PetscFunctionReturn(0); 4972 } 4973 4974 /*MC 4975 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4976 4977 Options Database Keys: 4978 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4979 4980 Level: beginner 4981 4982 .seealso: MatCreateAIJ() 4983 M*/ 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "MatCreate_MPIAIJ" 4987 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4988 { 4989 Mat_MPIAIJ *b; 4990 PetscErrorCode ierr; 4991 PetscMPIInt size; 4992 4993 PetscFunctionBegin; 4994 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4995 4996 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4997 B->data = (void*)b; 4998 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4999 B->assembled = PETSC_FALSE; 5000 B->insertmode = NOT_SET_VALUES; 5001 b->size = size; 5002 5003 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5004 5005 /* build cache for off array entries formed */ 5006 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5007 5008 b->donotstash = PETSC_FALSE; 5009 b->colmap = 0; 5010 b->garray = 0; 5011 b->roworiented = PETSC_TRUE; 5012 5013 /* stuff used for matrix vector multiply */ 5014 b->lvec = NULL; 5015 b->Mvctx = NULL; 5016 5017 /* stuff for MatGetRow() */ 5018 b->rowindices = 0; 5019 b->rowvalues = 0; 5020 b->getrowactive = PETSC_FALSE; 5021 5022 /* flexible pointer used in CUSP/CUSPARSE classes */ 5023 b->spptr = NULL; 5024 5025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5035 #if defined(PETSC_HAVE_ELEMENTAL) 5036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5037 #endif 5038 #if defined(PETSC_HAVE_HYPRE) 5039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5040 #endif 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 #undef __FUNCT__ 5050 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5051 /*@C 5052 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5053 and "off-diagonal" part of the matrix in CSR format. 5054 5055 Collective on MPI_Comm 5056 5057 Input Parameters: 5058 + comm - MPI communicator 5059 . m - number of local rows (Cannot be PETSC_DECIDE) 5060 . n - This value should be the same as the local size used in creating the 5061 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5062 calculated if N is given) For square matrices n is almost always m. 5063 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5064 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5065 . i - row indices for "diagonal" portion of matrix 5066 . j - column indices 5067 . a - matrix values 5068 . oi - row indices for "off-diagonal" portion of matrix 5069 . oj - column indices 5070 - oa - matrix values 5071 5072 Output Parameter: 5073 . mat - the matrix 5074 5075 Level: advanced 5076 5077 Notes: 5078 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5079 must free the arrays once the matrix has been destroyed and not before. 5080 5081 The i and j indices are 0 based 5082 5083 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5084 5085 This sets local rows and cannot be used to set off-processor values. 5086 5087 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5088 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5089 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5090 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5091 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5092 communication if it is known that only local entries will be set. 5093 5094 .keywords: matrix, aij, compressed row, sparse, parallel 5095 5096 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5097 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5098 @*/ 5099 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5100 { 5101 PetscErrorCode ierr; 5102 Mat_MPIAIJ *maij; 5103 5104 PetscFunctionBegin; 5105 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5106 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5107 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5108 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5109 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5110 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5111 maij = (Mat_MPIAIJ*) (*mat)->data; 5112 5113 (*mat)->preallocated = PETSC_TRUE; 5114 5115 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5116 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5117 5118 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5119 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5120 5121 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5122 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5123 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5124 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5125 5126 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5127 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5128 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5129 PetscFunctionReturn(0); 5130 } 5131 5132 /* 5133 Special version for direct calls from Fortran 5134 */ 5135 #include <petsc/private/fortranimpl.h> 5136 5137 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5138 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5139 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5140 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5141 #endif 5142 5143 /* Change these macros so can be used in void function */ 5144 #undef CHKERRQ 5145 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5146 #undef SETERRQ2 5147 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5148 #undef SETERRQ3 5149 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5150 #undef SETERRQ 5151 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5152 5153 #undef __FUNCT__ 5154 #define __FUNCT__ "matsetvaluesmpiaij_" 5155 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5156 { 5157 Mat mat = *mmat; 5158 PetscInt m = *mm, n = *mn; 5159 InsertMode addv = *maddv; 5160 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5161 PetscScalar value; 5162 PetscErrorCode ierr; 5163 5164 MatCheckPreallocated(mat,1); 5165 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5166 5167 #if defined(PETSC_USE_DEBUG) 5168 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5169 #endif 5170 { 5171 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5172 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5173 PetscBool roworiented = aij->roworiented; 5174 5175 /* Some Variables required in the macro */ 5176 Mat A = aij->A; 5177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5178 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5179 MatScalar *aa = a->a; 5180 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5181 Mat B = aij->B; 5182 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5183 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5184 MatScalar *ba = b->a; 5185 5186 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5187 PetscInt nonew = a->nonew; 5188 MatScalar *ap1,*ap2; 5189 5190 PetscFunctionBegin; 5191 for (i=0; i<m; i++) { 5192 if (im[i] < 0) continue; 5193 #if defined(PETSC_USE_DEBUG) 5194 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5195 #endif 5196 if (im[i] >= rstart && im[i] < rend) { 5197 row = im[i] - rstart; 5198 lastcol1 = -1; 5199 rp1 = aj + ai[row]; 5200 ap1 = aa + ai[row]; 5201 rmax1 = aimax[row]; 5202 nrow1 = ailen[row]; 5203 low1 = 0; 5204 high1 = nrow1; 5205 lastcol2 = -1; 5206 rp2 = bj + bi[row]; 5207 ap2 = ba + bi[row]; 5208 rmax2 = bimax[row]; 5209 nrow2 = bilen[row]; 5210 low2 = 0; 5211 high2 = nrow2; 5212 5213 for (j=0; j<n; j++) { 5214 if (roworiented) value = v[i*n+j]; 5215 else value = v[i+j*m]; 5216 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5217 if (in[j] >= cstart && in[j] < cend) { 5218 col = in[j] - cstart; 5219 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5220 } else if (in[j] < 0) continue; 5221 #if defined(PETSC_USE_DEBUG) 5222 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5223 #endif 5224 else { 5225 if (mat->was_assembled) { 5226 if (!aij->colmap) { 5227 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5228 } 5229 #if defined(PETSC_USE_CTABLE) 5230 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5231 col--; 5232 #else 5233 col = aij->colmap[in[j]] - 1; 5234 #endif 5235 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5236 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5237 col = in[j]; 5238 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5239 B = aij->B; 5240 b = (Mat_SeqAIJ*)B->data; 5241 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5242 rp2 = bj + bi[row]; 5243 ap2 = ba + bi[row]; 5244 rmax2 = bimax[row]; 5245 nrow2 = bilen[row]; 5246 low2 = 0; 5247 high2 = nrow2; 5248 bm = aij->B->rmap->n; 5249 ba = b->a; 5250 } 5251 } else col = in[j]; 5252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5253 } 5254 } 5255 } else if (!aij->donotstash) { 5256 if (roworiented) { 5257 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5258 } else { 5259 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5260 } 5261 } 5262 } 5263 } 5264 PetscFunctionReturnVoid(); 5265 } 5266 5267