1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ" 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 63 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 64 { 65 PetscErrorCode ierr; 66 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 67 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 68 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 69 const PetscInt *ia,*ib; 70 const MatScalar *aa,*bb; 71 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 72 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 73 74 PetscFunctionBegin; 75 *keptrows = 0; 76 ia = a->i; 77 ib = b->i; 78 for (i=0; i<m; i++) { 79 na = ia[i+1] - ia[i]; 80 nb = ib[i+1] - ib[i]; 81 if (!na && !nb) { 82 cnt++; 83 goto ok1; 84 } 85 aa = a->a + ia[i]; 86 for (j=0; j<na; j++) { 87 if (aa[j] != 0.0) goto ok1; 88 } 89 bb = b->a + ib[i]; 90 for (j=0; j <nb; j++) { 91 if (bb[j] != 0.0) goto ok1; 92 } 93 cnt++; 94 ok1:; 95 } 96 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 97 if (!n0rows) PetscFunctionReturn(0); 98 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 99 cnt = 0; 100 for (i=0; i<m; i++) { 101 na = ia[i+1] - ia[i]; 102 nb = ib[i+1] - ib[i]; 103 if (!na && !nb) continue; 104 aa = a->a + ia[i]; 105 for (j=0; j<na;j++) { 106 if (aa[j] != 0.0) { 107 rows[cnt++] = rstart + i; 108 goto ok2; 109 } 110 } 111 bb = b->a + ib[i]; 112 for (j=0; j<nb; j++) { 113 if (bb[j] != 0.0) { 114 rows[cnt++] = rstart + i; 115 goto ok2; 116 } 117 } 118 ok2:; 119 } 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 126 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 130 131 PetscFunctionBegin; 132 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 133 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 134 } else { 135 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 136 } 137 PetscFunctionReturn(0); 138 } 139 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 143 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 144 { 145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 146 PetscErrorCode ierr; 147 PetscInt i,rstart,nrows,*rows; 148 149 PetscFunctionBegin; 150 *zrows = NULL; 151 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 152 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 153 for (i=0; i<nrows; i++) rows[i] += rstart; 154 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 160 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 161 { 162 PetscErrorCode ierr; 163 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 164 PetscInt i,n,*garray = aij->garray; 165 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 166 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 167 PetscReal *work; 168 169 PetscFunctionBegin; 170 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 171 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 172 if (type == NORM_2) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 178 } 179 } else if (type == NORM_1) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 185 } 186 } else if (type == NORM_INFINITY) { 187 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 188 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 189 } 190 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 191 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 192 } 193 194 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 195 if (type == NORM_INFINITY) { 196 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 197 } else { 198 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 199 } 200 ierr = PetscFree(work);CHKERRQ(ierr); 201 if (type == NORM_2) { 202 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 203 } 204 PetscFunctionReturn(0); 205 } 206 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 209 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 210 { 211 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 212 IS sis,gis; 213 PetscErrorCode ierr; 214 const PetscInt *isis,*igis; 215 PetscInt n,*iis,nsis,ngis,rstart,i; 216 217 PetscFunctionBegin; 218 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 219 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 220 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 221 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 222 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 223 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 224 225 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 226 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 228 n = ngis + nsis; 229 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 230 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 231 for (i=0; i<n; i++) iis[i] += rstart; 232 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 233 234 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 235 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 236 ierr = ISDestroy(&sis);CHKERRQ(ierr); 237 ierr = ISDestroy(&gis);CHKERRQ(ierr); 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatDistribute_MPIAIJ" 243 /* 244 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 245 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 246 247 Only for square matrices 248 249 Used by a preconditioner, hence PETSC_EXTERN 250 */ 251 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 252 { 253 PetscMPIInt rank,size; 254 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 255 PetscErrorCode ierr; 256 Mat mat; 257 Mat_SeqAIJ *gmata; 258 PetscMPIInt tag; 259 MPI_Status status; 260 PetscBool aij; 261 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 262 263 PetscFunctionBegin; 264 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 265 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 266 if (!rank) { 267 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 268 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 269 } 270 if (reuse == MAT_INITIAL_MATRIX) { 271 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 272 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 273 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 274 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 275 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 276 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 277 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 278 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 279 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 280 281 rowners[0] = 0; 282 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 283 rstart = rowners[rank]; 284 rend = rowners[rank+1]; 285 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 286 if (!rank) { 287 gmata = (Mat_SeqAIJ*) gmat->data; 288 /* send row lengths to all processors */ 289 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 290 for (i=1; i<size; i++) { 291 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 } 293 /* determine number diagonal and off-diagonal counts */ 294 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 295 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 296 jj = 0; 297 for (i=0; i<m; i++) { 298 for (j=0; j<dlens[i]; j++) { 299 if (gmata->j[jj] < rstart) ld[i]++; 300 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 301 jj++; 302 } 303 } 304 /* send column indices to other processes */ 305 for (i=1; i<size; i++) { 306 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 307 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 308 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 309 } 310 311 /* send numerical values to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 315 } 316 gmataa = gmata->a; 317 gmataj = gmata->j; 318 319 } else { 320 /* receive row lengths */ 321 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 322 /* receive column indices */ 323 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 324 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 325 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 326 /* determine number diagonal and off-diagonal counts */ 327 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 328 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 329 jj = 0; 330 for (i=0; i<m; i++) { 331 for (j=0; j<dlens[i]; j++) { 332 if (gmataj[jj] < rstart) ld[i]++; 333 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 334 jj++; 335 } 336 } 337 /* receive numerical values */ 338 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 339 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 340 } 341 /* set preallocation */ 342 for (i=0; i<m; i++) { 343 dlens[i] -= olens[i]; 344 } 345 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 346 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 347 348 for (i=0; i<m; i++) { 349 dlens[i] += olens[i]; 350 } 351 cnt = 0; 352 for (i=0; i<m; i++) { 353 row = rstart + i; 354 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 355 cnt += dlens[i]; 356 } 357 if (rank) { 358 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 359 } 360 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 361 ierr = PetscFree(rowners);CHKERRQ(ierr); 362 363 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 364 365 *inmat = mat; 366 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 367 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 368 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 369 mat = *inmat; 370 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 371 if (!rank) { 372 /* send numerical values to other processes */ 373 gmata = (Mat_SeqAIJ*) gmat->data; 374 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 375 gmataa = gmata->a; 376 for (i=1; i<size; i++) { 377 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 378 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 379 } 380 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 381 } else { 382 /* receive numerical values from process 0*/ 383 nz = Ad->nz + Ao->nz; 384 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 385 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 386 } 387 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 388 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 389 ad = Ad->a; 390 ao = Ao->a; 391 if (mat->rmap->n) { 392 i = 0; 393 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 394 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 395 } 396 for (i=1; i<mat->rmap->n; i++) { 397 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 398 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 399 } 400 i--; 401 if (mat->rmap->n) { 402 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 403 } 404 if (rank) { 405 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 406 } 407 } 408 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 /* 414 Local utility routine that creates a mapping from the global column 415 number to the local number in the off-diagonal part of the local 416 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 417 a slightly higher hash table cost; without it it is not scalable (each processor 418 has an order N integer array but is fast to acess. 419 */ 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 422 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 423 { 424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 425 PetscErrorCode ierr; 426 PetscInt n = aij->B->cmap->n,i; 427 428 PetscFunctionBegin; 429 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 430 #if defined(PETSC_USE_CTABLE) 431 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 432 for (i=0; i<n; i++) { 433 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 434 } 435 #else 436 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 438 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 439 #endif 440 PetscFunctionReturn(0); 441 } 442 443 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 444 { \ 445 if (col <= lastcol1) low1 = 0; \ 446 else high1 = nrow1; \ 447 lastcol1 = col;\ 448 while (high1-low1 > 5) { \ 449 t = (low1+high1)/2; \ 450 if (rp1[t] > col) high1 = t; \ 451 else low1 = t; \ 452 } \ 453 for (_i=low1; _i<high1; _i++) { \ 454 if (rp1[_i] > col) break; \ 455 if (rp1[_i] == col) { \ 456 if (addv == ADD_VALUES) ap1[_i] += value; \ 457 else ap1[_i] = value; \ 458 goto a_noinsert; \ 459 } \ 460 } \ 461 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 462 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 463 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 464 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 465 N = nrow1++ - 1; a->nz++; high1++; \ 466 /* shift up all the later entries in this row */ \ 467 for (ii=N; ii>=_i; ii--) { \ 468 rp1[ii+1] = rp1[ii]; \ 469 ap1[ii+1] = ap1[ii]; \ 470 } \ 471 rp1[_i] = col; \ 472 ap1[_i] = value; \ 473 A->nonzerostate++;\ 474 a_noinsert: ; \ 475 ailen[row] = nrow1; \ 476 } 477 478 479 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 480 { \ 481 if (col <= lastcol2) low2 = 0; \ 482 else high2 = nrow2; \ 483 lastcol2 = col; \ 484 while (high2-low2 > 5) { \ 485 t = (low2+high2)/2; \ 486 if (rp2[t] > col) high2 = t; \ 487 else low2 = t; \ 488 } \ 489 for (_i=low2; _i<high2; _i++) { \ 490 if (rp2[_i] > col) break; \ 491 if (rp2[_i] == col) { \ 492 if (addv == ADD_VALUES) ap2[_i] += value; \ 493 else ap2[_i] = value; \ 494 goto b_noinsert; \ 495 } \ 496 } \ 497 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 498 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 499 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 500 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 501 N = nrow2++ - 1; b->nz++; high2++; \ 502 /* shift up all the later entries in this row */ \ 503 for (ii=N; ii>=_i; ii--) { \ 504 rp2[ii+1] = rp2[ii]; \ 505 ap2[ii+1] = ap2[ii]; \ 506 } \ 507 rp2[_i] = col; \ 508 ap2[_i] = value; \ 509 B->nonzerostate++; \ 510 b_noinsert: ; \ 511 bilen[row] = nrow2; \ 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 516 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 517 { 518 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 520 PetscErrorCode ierr; 521 PetscInt l,*garray = mat->garray,diag; 522 523 PetscFunctionBegin; 524 /* code only works for square matrices A */ 525 526 /* find size of row to the left of the diagonal part */ 527 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 528 row = row - diag; 529 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 530 if (garray[b->j[b->i[row]+l]] > diag) break; 531 } 532 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 533 534 /* diagonal part */ 535 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 536 537 /* right of diagonal part */ 538 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatSetValues_MPIAIJ" 544 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 547 PetscScalar value; 548 PetscErrorCode ierr; 549 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 550 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 551 PetscBool roworiented = aij->roworiented; 552 553 /* Some Variables required in the macro */ 554 Mat A = aij->A; 555 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 556 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 557 MatScalar *aa = a->a; 558 PetscBool ignorezeroentries = a->ignorezeroentries; 559 Mat B = aij->B; 560 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 561 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 562 MatScalar *ba = b->a; 563 564 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 565 PetscInt nonew; 566 MatScalar *ap1,*ap2; 567 568 PetscFunctionBegin; 569 for (i=0; i<m; i++) { 570 if (im[i] < 0) continue; 571 #if defined(PETSC_USE_DEBUG) 572 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 573 #endif 574 if (im[i] >= rstart && im[i] < rend) { 575 row = im[i] - rstart; 576 lastcol1 = -1; 577 rp1 = aj + ai[row]; 578 ap1 = aa + ai[row]; 579 rmax1 = aimax[row]; 580 nrow1 = ailen[row]; 581 low1 = 0; 582 high1 = nrow1; 583 lastcol2 = -1; 584 rp2 = bj + bi[row]; 585 ap2 = ba + bi[row]; 586 rmax2 = bimax[row]; 587 nrow2 = bilen[row]; 588 low2 = 0; 589 high2 = nrow2; 590 591 for (j=0; j<n; j++) { 592 if (roworiented) value = v[i*n+j]; 593 else value = v[i+j*m]; 594 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 595 if (in[j] >= cstart && in[j] < cend) { 596 col = in[j] - cstart; 597 nonew = a->nonew; 598 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 599 } else if (in[j] < 0) continue; 600 #if defined(PETSC_USE_DEBUG) 601 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 602 #endif 603 else { 604 if (mat->was_assembled) { 605 if (!aij->colmap) { 606 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 607 } 608 #if defined(PETSC_USE_CTABLE) 609 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 610 col--; 611 #else 612 col = aij->colmap[in[j]] - 1; 613 #endif 614 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 615 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 616 col = in[j]; 617 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 618 B = aij->B; 619 b = (Mat_SeqAIJ*)B->data; 620 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 621 rp2 = bj + bi[row]; 622 ap2 = ba + bi[row]; 623 rmax2 = bimax[row]; 624 nrow2 = bilen[row]; 625 low2 = 0; 626 high2 = nrow2; 627 bm = aij->B->rmap->n; 628 ba = b->a; 629 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 630 } else col = in[j]; 631 nonew = b->nonew; 632 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 633 } 634 } 635 } else { 636 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 637 if (!aij->donotstash) { 638 mat->assembled = PETSC_FALSE; 639 if (roworiented) { 640 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 641 } else { 642 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatGetValues_MPIAIJ" 652 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 PetscErrorCode ierr; 656 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 657 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 658 659 PetscFunctionBegin; 660 for (i=0; i<m; i++) { 661 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 662 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 663 if (idxm[i] >= rstart && idxm[i] < rend) { 664 row = idxm[i] - rstart; 665 for (j=0; j<n; j++) { 666 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 667 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 668 if (idxn[j] >= cstart && idxn[j] < cend) { 669 col = idxn[j] - cstart; 670 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 671 } else { 672 if (!aij->colmap) { 673 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 674 } 675 #if defined(PETSC_USE_CTABLE) 676 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 677 col--; 678 #else 679 col = aij->colmap[idxn[j]] - 1; 680 #endif 681 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 682 else { 683 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 696 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 697 { 698 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 699 PetscErrorCode ierr; 700 PetscInt nstash,reallocs; 701 702 PetscFunctionBegin; 703 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 704 705 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 706 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 707 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNCT__ 712 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 713 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 714 { 715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 716 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt n; 719 PetscInt i,j,rstart,ncols,flg; 720 PetscInt *row,*col; 721 PetscBool other_disassembled; 722 PetscScalar *val; 723 724 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 725 726 PetscFunctionBegin; 727 if (!aij->donotstash && !mat->nooffprocentries) { 728 while (1) { 729 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 730 if (!flg) break; 731 732 for (i=0; i<n; ) { 733 /* Now identify the consecutive vals belonging to the same row */ 734 for (j=i,rstart=row[j]; j<n; j++) { 735 if (row[j] != rstart) break; 736 } 737 if (j < n) ncols = j-i; 738 else ncols = n-i; 739 /* Now assemble all these values with a single function call */ 740 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 741 742 i = j; 743 } 744 } 745 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 746 } 747 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 748 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 749 750 /* determine if any processor has disassembled, if so we must 751 also disassemble ourselfs, in order that we may reassemble. */ 752 /* 753 if nonzero structure of submatrix B cannot change then we know that 754 no processor disassembled thus we can skip this stuff 755 */ 756 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 757 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 758 if (mat->was_assembled && !other_disassembled) { 759 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 760 } 761 } 762 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 763 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 764 } 765 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 766 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 768 769 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 770 771 aij->rowvalues = 0; 772 773 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 774 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 775 776 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 777 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 778 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 779 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 786 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 787 { 788 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 789 PetscErrorCode ierr; 790 791 PetscFunctionBegin; 792 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 793 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatZeroRows_MPIAIJ" 799 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 800 { 801 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 802 PetscInt *lrows; 803 PetscInt r, len; 804 PetscErrorCode ierr; 805 806 PetscFunctionBegin; 807 /* get locally owned rows */ 808 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 809 /* fix right hand side if needed */ 810 if (x && b) { 811 const PetscScalar *xx; 812 PetscScalar *bb; 813 814 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 815 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 816 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 817 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 818 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 819 } 820 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 821 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 822 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 823 PetscBool cong; 824 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 825 if (cong) A->congruentlayouts = 1; 826 else A->congruentlayouts = 0; 827 } 828 if ((diag != 0.0) && A->congruentlayouts) { 829 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 830 } else if (diag != 0.0) { 831 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 832 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 833 for (r = 0; r < len; ++r) { 834 const PetscInt row = lrows[r] + A->rmap->rstart; 835 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 836 } 837 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 839 } else { 840 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(lrows);CHKERRQ(ierr); 843 844 /* only change matrix nonzero state if pattern was allowed to be changed */ 845 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 846 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 847 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 848 } 849 PetscFunctionReturn(0); 850 } 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 854 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 855 { 856 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 857 PetscErrorCode ierr; 858 PetscMPIInt n = A->rmap->n; 859 PetscInt i,j,r,m,p = 0,len = 0; 860 PetscInt *lrows,*owners = A->rmap->range; 861 PetscSFNode *rrows; 862 PetscSF sf; 863 const PetscScalar *xx; 864 PetscScalar *bb,*mask; 865 Vec xmask,lmask; 866 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 867 const PetscInt *aj, *ii,*ridx; 868 PetscScalar *aa; 869 870 PetscFunctionBegin; 871 /* Create SF where leaves are input rows and roots are owned rows */ 872 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 873 for (r = 0; r < n; ++r) lrows[r] = -1; 874 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 875 for (r = 0; r < N; ++r) { 876 const PetscInt idx = rows[r]; 877 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 878 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 879 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 880 } 881 rrows[r].rank = p; 882 rrows[r].index = rows[r] - owners[p]; 883 } 884 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 885 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 886 /* Collect flags for rows to be zeroed */ 887 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 889 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 890 /* Compress and put in row numbers */ 891 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 892 /* zero diagonal part of matrix */ 893 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 894 /* handle off diagonal part of matrix */ 895 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 896 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 897 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 898 for (i=0; i<len; i++) bb[lrows[i]] = 1; 899 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 900 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 903 if (x) { 904 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 907 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 908 } 909 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 910 /* remove zeroed rows of off diagonal matrix */ 911 ii = aij->i; 912 for (i=0; i<len; i++) { 913 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 914 } 915 /* loop over all elements of off process part of matrix zeroing removed columns*/ 916 if (aij->compressedrow.use) { 917 m = aij->compressedrow.nrows; 918 ii = aij->compressedrow.i; 919 ridx = aij->compressedrow.rindex; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij->a + ii[i]; 924 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[*ridx] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 ridx++; 934 } 935 } else { /* do not use compressed row format */ 936 m = l->B->rmap->n; 937 for (i=0; i<m; i++) { 938 n = ii[i+1] - ii[i]; 939 aj = aij->j + ii[i]; 940 aa = aij->a + ii[i]; 941 for (j=0; j<n; j++) { 942 if (PetscAbsScalar(mask[*aj])) { 943 if (b) bb[i] -= *aa*xx[*aj]; 944 *aa = 0.0; 945 } 946 aa++; 947 aj++; 948 } 949 } 950 } 951 if (x) { 952 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 953 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 954 } 955 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 956 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 957 ierr = PetscFree(lrows);CHKERRQ(ierr); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatMult_MPIAIJ" 969 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 970 { 971 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 972 PetscErrorCode ierr; 973 PetscInt nt; 974 975 PetscFunctionBegin; 976 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 977 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 978 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 980 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 987 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 988 { 989 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 990 PetscErrorCode ierr; 991 992 PetscFunctionBegin; 993 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatMultAdd_MPIAIJ" 999 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1000 { 1001 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1002 PetscErrorCode ierr; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1007 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1014 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1015 { 1016 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1017 PetscErrorCode ierr; 1018 PetscBool merged; 1019 1020 PetscFunctionBegin; 1021 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1022 /* do nondiagonal part */ 1023 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1024 if (!merged) { 1025 /* send it on its way */ 1026 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1027 /* do local part */ 1028 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1029 /* receive remote parts: note this assumes the values are not actually */ 1030 /* added in yy until the next line, */ 1031 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1032 } else { 1033 /* do local part */ 1034 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1035 /* send it on its way */ 1036 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1037 /* values actually were received in the Begin() but we need to call this nop */ 1038 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1039 } 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1045 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1046 { 1047 MPI_Comm comm; 1048 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1049 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1050 IS Me,Notme; 1051 PetscErrorCode ierr; 1052 PetscInt M,N,first,last,*notme,i; 1053 PetscMPIInt size; 1054 1055 PetscFunctionBegin; 1056 /* Easy test: symmetric diagonal block */ 1057 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1058 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1059 if (!*f) PetscFunctionReturn(0); 1060 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1062 if (size == 1) PetscFunctionReturn(0); 1063 1064 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1065 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1066 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1067 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1068 for (i=0; i<first; i++) notme[i] = i; 1069 for (i=last; i<M; i++) notme[i-last+first] = i; 1070 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1071 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1072 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1073 Aoff = Aoffs[0]; 1074 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1075 Boff = Boffs[0]; 1076 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1077 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1078 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1079 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1080 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1081 ierr = PetscFree(notme);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1087 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 /* do nondiagonal part */ 1094 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1095 /* send it on its way */ 1096 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1097 /* do local part */ 1098 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1099 /* receive remote parts */ 1100 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 /* 1105 This only works correctly for square matrices where the subblock A->A is the 1106 diagonal block 1107 */ 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1110 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1111 { 1112 PetscErrorCode ierr; 1113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1114 1115 PetscFunctionBegin; 1116 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1117 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1118 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatScale_MPIAIJ" 1124 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1125 { 1126 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1131 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatDestroy_MPIAIJ" 1137 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 #if defined(PETSC_USE_LOG) 1144 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1145 #endif 1146 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1147 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1149 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1150 #if defined(PETSC_USE_CTABLE) 1151 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1152 #else 1153 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1154 #endif 1155 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1156 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1157 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1158 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1159 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1160 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1161 1162 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1170 #if defined(PETSC_HAVE_ELEMENTAL) 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1172 #endif 1173 #if defined(PETSC_HAVE_HYPRE) 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1176 #endif 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1185 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1186 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1187 PetscErrorCode ierr; 1188 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1189 int fd; 1190 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1191 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1192 PetscScalar *column_values; 1193 PetscInt message_count,flowcontrolcount; 1194 FILE *file; 1195 1196 PetscFunctionBegin; 1197 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1198 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1199 nz = A->nz + B->nz; 1200 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1201 if (!rank) { 1202 header[0] = MAT_FILE_CLASSID; 1203 header[1] = mat->rmap->N; 1204 header[2] = mat->cmap->N; 1205 1206 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1207 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1208 /* get largest number of rows any processor has */ 1209 rlen = mat->rmap->n; 1210 range = mat->rmap->range; 1211 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1212 } else { 1213 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1214 rlen = mat->rmap->n; 1215 } 1216 1217 /* load up the local row counts */ 1218 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1219 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1220 1221 /* store the row lengths to the file */ 1222 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1223 if (!rank) { 1224 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 rlen = range[i+1] - range[i]; 1228 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 } 1231 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1232 } else { 1233 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1234 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1235 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1236 } 1237 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1238 1239 /* load up the local column indices */ 1240 nzmax = nz; /* th processor needs space a largest processor needs */ 1241 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1243 cnt = 0; 1244 for (i=0; i<mat->rmap->n; i++) { 1245 for (j=B->i[i]; j<B->i[i+1]; j++) { 1246 if ((col = garray[B->j[j]]) > cstart) break; 1247 column_indices[cnt++] = col; 1248 } 1249 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1250 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1251 } 1252 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1253 1254 /* store the column indices to the file */ 1255 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1256 if (!rank) { 1257 MPI_Status status; 1258 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1263 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1274 1275 /* load up the local column values */ 1276 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1277 cnt = 0; 1278 for (i=0; i<mat->rmap->n; i++) { 1279 for (j=B->i[i]; j<B->i[i+1]; j++) { 1280 if (garray[B->j[j]] > cstart) break; 1281 column_values[cnt++] = B->a[j]; 1282 } 1283 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1284 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1285 } 1286 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1287 1288 /* store the column values to the file */ 1289 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1290 if (!rank) { 1291 MPI_Status status; 1292 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1293 for (i=1; i<size; i++) { 1294 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1295 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1296 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1297 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1299 } 1300 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1303 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1304 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1306 } 1307 ierr = PetscFree(column_values);CHKERRQ(ierr); 1308 1309 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1310 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #include <petscdraw.h> 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1317 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1318 { 1319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1320 PetscErrorCode ierr; 1321 PetscMPIInt rank = aij->rank,size = aij->size; 1322 PetscBool isdraw,iascii,isbinary; 1323 PetscViewer sviewer; 1324 PetscViewerFormat format; 1325 1326 PetscFunctionBegin; 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1328 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1329 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1330 if (iascii) { 1331 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1332 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1333 MatInfo info; 1334 PetscBool inodes; 1335 1336 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1337 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1338 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1339 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1340 if (!inodes) { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1345 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1346 } 1347 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1348 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1349 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1351 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1352 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1354 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1357 PetscInt inodecount,inodelimit,*inodes; 1358 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1359 if (inodes) { 1360 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1361 } else { 1362 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1363 } 1364 PetscFunctionReturn(0); 1365 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1366 PetscFunctionReturn(0); 1367 } 1368 } else if (isbinary) { 1369 if (size == 1) { 1370 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1371 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1372 } else { 1373 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1374 } 1375 PetscFunctionReturn(0); 1376 } else if (isdraw) { 1377 PetscDraw draw; 1378 PetscBool isnull; 1379 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1380 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1381 if (isnull) PetscFunctionReturn(0); 1382 } 1383 1384 { 1385 /* assemble the entire matrix onto first processor. */ 1386 Mat A; 1387 Mat_SeqAIJ *Aloc; 1388 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1389 MatScalar *a; 1390 1391 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1392 if (!rank) { 1393 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1394 } else { 1395 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1396 } 1397 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1398 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1399 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1400 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1401 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*)aij->A->data; 1405 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1406 row = mat->rmap->rstart; 1407 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; 1411 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1412 } 1413 aj = Aloc->j; 1414 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1415 1416 /* copy over the B part */ 1417 Aloc = (Mat_SeqAIJ*)aij->B->data; 1418 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1419 row = mat->rmap->rstart; 1420 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1421 ct = cols; 1422 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1423 for (i=0; i<m; i++) { 1424 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1425 row++; 1426 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1427 } 1428 ierr = PetscFree(ct);CHKERRQ(ierr); 1429 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 /* 1432 Everyone has to call to draw the matrix since the graphics waits are 1433 synchronized across all processors that share the PetscDraw object 1434 */ 1435 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1436 if (!rank) { 1437 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1439 } 1440 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1441 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1442 ierr = MatDestroy(&A);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatView_MPIAIJ" 1449 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1450 { 1451 PetscErrorCode ierr; 1452 PetscBool iascii,isdraw,issocket,isbinary; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1458 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1459 if (iascii || isdraw || isbinary || issocket) { 1460 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatSOR_MPIAIJ" 1467 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1468 { 1469 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1470 PetscErrorCode ierr; 1471 Vec bb1 = 0; 1472 PetscBool hasop; 1473 1474 PetscFunctionBegin; 1475 if (flag == SOR_APPLY_UPPER) { 1476 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1477 PetscFunctionReturn(0); 1478 } 1479 1480 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1481 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1482 } 1483 1484 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1485 if (flag & SOR_ZERO_INITIAL_GUESS) { 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1487 its--; 1488 } 1489 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1502 if (flag & SOR_ZERO_INITIAL_GUESS) { 1503 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1504 its--; 1505 } 1506 while (its--) { 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 1510 /* update rhs: bb1 = bb - B*x */ 1511 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1512 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1513 1514 /* local sweep */ 1515 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1516 } 1517 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1518 if (flag & SOR_ZERO_INITIAL_GUESS) { 1519 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1520 its--; 1521 } 1522 while (its--) { 1523 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 1526 /* update rhs: bb1 = bb - B*x */ 1527 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1528 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1529 1530 /* local sweep */ 1531 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1532 } 1533 } else if (flag & SOR_EISENSTAT) { 1534 Vec xx1; 1535 1536 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1537 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1538 1539 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1541 if (!mat->diag) { 1542 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1543 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1544 } 1545 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1546 if (hasop) { 1547 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1548 } else { 1549 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1550 } 1551 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1552 1553 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1554 1555 /* local sweep */ 1556 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1557 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1558 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1559 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1560 1561 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1562 1563 matin->factorerrortype = mat->A->factorerrortype; 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatPermute_MPIAIJ" 1569 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1570 { 1571 Mat aA,aB,Aperm; 1572 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1573 PetscScalar *aa,*ba; 1574 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1575 PetscSF rowsf,sf; 1576 IS parcolp = NULL; 1577 PetscBool done; 1578 PetscErrorCode ierr; 1579 1580 PetscFunctionBegin; 1581 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1582 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1583 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1584 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1585 1586 /* Invert row permutation to find out where my rows should go */ 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1590 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1591 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 1594 /* Invert column permutation to find out where my columns should go */ 1595 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1596 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1597 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1598 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1599 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1600 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1601 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1602 1603 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1604 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1605 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1606 1607 /* Find out where my gcols should go */ 1608 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1609 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1610 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1611 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1612 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1613 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1614 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1616 1617 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1618 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1619 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1620 for (i=0; i<m; i++) { 1621 PetscInt row = rdest[i],rowner; 1622 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1623 for (j=ai[i]; j<ai[i+1]; j++) { 1624 PetscInt cowner,col = cdest[aj[j]]; 1625 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1626 if (rowner == cowner) dnnz[i]++; 1627 else onnz[i]++; 1628 } 1629 for (j=bi[i]; j<bi[i+1]; j++) { 1630 PetscInt cowner,col = gcdest[bj[j]]; 1631 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1632 if (rowner == cowner) dnnz[i]++; 1633 else onnz[i]++; 1634 } 1635 } 1636 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1637 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1638 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1639 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1640 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1641 1642 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1643 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1644 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1645 for (i=0; i<m; i++) { 1646 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1647 PetscInt j0,rowlen; 1648 rowlen = ai[i+1] - ai[i]; 1649 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1650 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1651 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 rowlen = bi[i+1] - bi[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 } 1659 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1661 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1662 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1663 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1664 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1665 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1666 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1667 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1668 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1669 *B = Aperm; 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1675 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1676 { 1677 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1682 if (ghosts) *ghosts = aij->garray; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1688 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1689 { 1690 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1691 Mat A = mat->A,B = mat->B; 1692 PetscErrorCode ierr; 1693 PetscReal isend[5],irecv[5]; 1694 1695 PetscFunctionBegin; 1696 info->block_size = 1.0; 1697 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1698 1699 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1700 isend[3] = info->memory; isend[4] = info->mallocs; 1701 1702 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1703 1704 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1705 isend[3] += info->memory; isend[4] += info->mallocs; 1706 if (flag == MAT_LOCAL) { 1707 info->nz_used = isend[0]; 1708 info->nz_allocated = isend[1]; 1709 info->nz_unneeded = isend[2]; 1710 info->memory = isend[3]; 1711 info->mallocs = isend[4]; 1712 } else if (flag == MAT_GLOBAL_MAX) { 1713 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1714 1715 info->nz_used = irecv[0]; 1716 info->nz_allocated = irecv[1]; 1717 info->nz_unneeded = irecv[2]; 1718 info->memory = irecv[3]; 1719 info->mallocs = irecv[4]; 1720 } else if (flag == MAT_GLOBAL_SUM) { 1721 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1722 1723 info->nz_used = irecv[0]; 1724 info->nz_allocated = irecv[1]; 1725 info->nz_unneeded = irecv[2]; 1726 info->memory = irecv[3]; 1727 info->mallocs = irecv[4]; 1728 } 1729 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1730 info->fill_ratio_needed = 0; 1731 info->factor_mallocs = 0; 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatSetOption_MPIAIJ" 1737 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1738 { 1739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1740 PetscErrorCode ierr; 1741 1742 PetscFunctionBegin; 1743 switch (op) { 1744 case MAT_NEW_NONZERO_LOCATIONS: 1745 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1746 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1747 case MAT_KEEP_NONZERO_PATTERN: 1748 case MAT_NEW_NONZERO_LOCATION_ERR: 1749 case MAT_USE_INODES: 1750 case MAT_IGNORE_ZERO_ENTRIES: 1751 MatCheckPreallocated(A,1); 1752 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1753 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1754 break; 1755 case MAT_ROW_ORIENTED: 1756 MatCheckPreallocated(A,1); 1757 a->roworiented = flg; 1758 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_NEW_DIAGONALS: 1763 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1764 break; 1765 case MAT_IGNORE_OFF_PROC_ENTRIES: 1766 a->donotstash = flg; 1767 break; 1768 case MAT_SPD: 1769 A->spd_set = PETSC_TRUE; 1770 A->spd = flg; 1771 if (flg) { 1772 A->symmetric = PETSC_TRUE; 1773 A->structurally_symmetric = PETSC_TRUE; 1774 A->symmetric_set = PETSC_TRUE; 1775 A->structurally_symmetric_set = PETSC_TRUE; 1776 } 1777 break; 1778 case MAT_SYMMETRIC: 1779 MatCheckPreallocated(A,1); 1780 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1781 break; 1782 case MAT_STRUCTURALLY_SYMMETRIC: 1783 MatCheckPreallocated(A,1); 1784 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1785 break; 1786 case MAT_HERMITIAN: 1787 MatCheckPreallocated(A,1); 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_SYMMETRY_ETERNAL: 1791 MatCheckPreallocated(A,1); 1792 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1793 break; 1794 case MAT_SUBMAT_SINGLEIS: 1795 A->submat_singleis = flg; 1796 break; 1797 default: 1798 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1799 } 1800 PetscFunctionReturn(0); 1801 } 1802 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatGetRow_MPIAIJ" 1805 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1808 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1809 PetscErrorCode ierr; 1810 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1811 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1812 PetscInt *cmap,*idx_p; 1813 1814 PetscFunctionBegin; 1815 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1816 mat->getrowactive = PETSC_TRUE; 1817 1818 if (!mat->rowvalues && (idx || v)) { 1819 /* 1820 allocate enough space to hold information from the longest row. 1821 */ 1822 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1823 PetscInt max = 1,tmp; 1824 for (i=0; i<matin->rmap->n; i++) { 1825 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1826 if (max < tmp) max = tmp; 1827 } 1828 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1829 } 1830 1831 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1832 lrow = row - rstart; 1833 1834 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1835 if (!v) {pvA = 0; pvB = 0;} 1836 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1837 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1838 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1839 nztot = nzA + nzB; 1840 1841 cmap = mat->garray; 1842 if (v || idx) { 1843 if (nztot) { 1844 /* Sort by increasing column numbers, assuming A and B already sorted */ 1845 PetscInt imark = -1; 1846 if (v) { 1847 *v = v_p = mat->rowvalues; 1848 for (i=0; i<nzB; i++) { 1849 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1850 else break; 1851 } 1852 imark = i; 1853 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1854 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1855 } 1856 if (idx) { 1857 *idx = idx_p = mat->rowindices; 1858 if (imark > -1) { 1859 for (i=0; i<imark; i++) { 1860 idx_p[i] = cmap[cworkB[i]]; 1861 } 1862 } else { 1863 for (i=0; i<nzB; i++) { 1864 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1865 else break; 1866 } 1867 imark = i; 1868 } 1869 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1870 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1871 } 1872 } else { 1873 if (idx) *idx = 0; 1874 if (v) *v = 0; 1875 } 1876 } 1877 *nz = nztot; 1878 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1879 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1880 PetscFunctionReturn(0); 1881 } 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1885 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1886 { 1887 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1888 1889 PetscFunctionBegin; 1890 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1891 aij->getrowactive = PETSC_FALSE; 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "MatNorm_MPIAIJ" 1897 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1898 { 1899 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1900 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1901 PetscErrorCode ierr; 1902 PetscInt i,j,cstart = mat->cmap->rstart; 1903 PetscReal sum = 0.0; 1904 MatScalar *v; 1905 1906 PetscFunctionBegin; 1907 if (aij->size == 1) { 1908 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1909 } else { 1910 if (type == NORM_FROBENIUS) { 1911 v = amat->a; 1912 for (i=0; i<amat->nz; i++) { 1913 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1914 } 1915 v = bmat->a; 1916 for (i=0; i<bmat->nz; i++) { 1917 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1918 } 1919 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1920 *norm = PetscSqrtReal(*norm); 1921 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1922 } else if (type == NORM_1) { /* max column norm */ 1923 PetscReal *tmp,*tmp2; 1924 PetscInt *jj,*garray = aij->garray; 1925 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1926 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1927 *norm = 0.0; 1928 v = amat->a; jj = amat->j; 1929 for (j=0; j<amat->nz; j++) { 1930 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1931 } 1932 v = bmat->a; jj = bmat->j; 1933 for (j=0; j<bmat->nz; j++) { 1934 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1935 } 1936 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1937 for (j=0; j<mat->cmap->N; j++) { 1938 if (tmp2[j] > *norm) *norm = tmp2[j]; 1939 } 1940 ierr = PetscFree(tmp);CHKERRQ(ierr); 1941 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1942 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1943 } else if (type == NORM_INFINITY) { /* max row norm */ 1944 PetscReal ntemp = 0.0; 1945 for (j=0; j<aij->A->rmap->n; j++) { 1946 v = amat->a + amat->i[j]; 1947 sum = 0.0; 1948 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1949 sum += PetscAbsScalar(*v); v++; 1950 } 1951 v = bmat->a + bmat->i[j]; 1952 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1953 sum += PetscAbsScalar(*v); v++; 1954 } 1955 if (sum > ntemp) ntemp = sum; 1956 } 1957 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1958 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1959 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatTranspose_MPIAIJ" 1966 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1967 { 1968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1969 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1970 PetscErrorCode ierr; 1971 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1972 PetscInt cstart = A->cmap->rstart,ncol; 1973 Mat B; 1974 MatScalar *array; 1975 1976 PetscFunctionBegin; 1977 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1978 1979 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1980 ai = Aloc->i; aj = Aloc->j; 1981 bi = Bloc->i; bj = Bloc->j; 1982 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1983 PetscInt *d_nnz,*g_nnz,*o_nnz; 1984 PetscSFNode *oloc; 1985 PETSC_UNUSED PetscSF sf; 1986 1987 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1988 /* compute d_nnz for preallocation */ 1989 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1990 for (i=0; i<ai[ma]; i++) { 1991 d_nnz[aj[i]]++; 1992 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1993 } 1994 /* compute local off-diagonal contributions */ 1995 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1996 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1997 /* map those to global */ 1998 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1999 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2000 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2001 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2002 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2003 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2004 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2005 2006 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2007 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2008 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2009 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2010 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2011 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2012 } else { 2013 B = *matout; 2014 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2015 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2016 } 2017 2018 /* copy over the A part */ 2019 array = Aloc->a; 2020 row = A->rmap->rstart; 2021 for (i=0; i<ma; i++) { 2022 ncol = ai[i+1]-ai[i]; 2023 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2024 row++; 2025 array += ncol; aj += ncol; 2026 } 2027 aj = Aloc->j; 2028 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2029 2030 /* copy over the B part */ 2031 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2032 array = Bloc->a; 2033 row = A->rmap->rstart; 2034 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2035 cols_tmp = cols; 2036 for (i=0; i<mb; i++) { 2037 ncol = bi[i+1]-bi[i]; 2038 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2039 row++; 2040 array += ncol; cols_tmp += ncol; 2041 } 2042 ierr = PetscFree(cols);CHKERRQ(ierr); 2043 2044 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2046 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2047 *matout = B; 2048 } else { 2049 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2050 } 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2056 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2057 { 2058 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2059 Mat a = aij->A,b = aij->B; 2060 PetscErrorCode ierr; 2061 PetscInt s1,s2,s3; 2062 2063 PetscFunctionBegin; 2064 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2065 if (rr) { 2066 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2067 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2068 /* Overlap communication with computation. */ 2069 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2070 } 2071 if (ll) { 2072 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2073 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2074 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2075 } 2076 /* scale the diagonal block */ 2077 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2078 2079 if (rr) { 2080 /* Do a scatter end and then right scale the off-diagonal block */ 2081 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2082 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2090 { 2091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatEqual_MPIAIJ" 2101 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2102 { 2103 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2104 Mat a,b,c,d; 2105 PetscBool flg; 2106 PetscErrorCode ierr; 2107 2108 PetscFunctionBegin; 2109 a = matA->A; b = matA->B; 2110 c = matB->A; d = matB->B; 2111 2112 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2113 if (flg) { 2114 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2115 } 2116 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatCopy_MPIAIJ" 2122 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2123 { 2124 PetscErrorCode ierr; 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2126 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2127 2128 PetscFunctionBegin; 2129 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2130 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2131 /* because of the column compression in the off-processor part of the matrix a->B, 2132 the number of columns in a->B and b->B may be different, hence we cannot call 2133 the MatCopy() directly on the two parts. If need be, we can provide a more 2134 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2135 then copying the submatrices */ 2136 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2137 } else { 2138 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2139 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2140 } 2141 PetscFunctionReturn(0); 2142 } 2143 2144 #undef __FUNCT__ 2145 #define __FUNCT__ "MatSetUp_MPIAIJ" 2146 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2147 { 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 /* 2156 Computes the number of nonzeros per row needed for preallocation when X and Y 2157 have different nonzero structure. 2158 */ 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2161 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2162 { 2163 PetscInt i,j,k,nzx,nzy; 2164 2165 PetscFunctionBegin; 2166 /* Set the number of nonzeros in the new matrix */ 2167 for (i=0; i<m; i++) { 2168 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2169 nzx = xi[i+1] - xi[i]; 2170 nzy = yi[i+1] - yi[i]; 2171 nnz[i] = 0; 2172 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2173 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2174 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2175 nnz[i]++; 2176 } 2177 for (; k<nzy; k++) nnz[i]++; 2178 } 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2185 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2186 { 2187 PetscErrorCode ierr; 2188 PetscInt m = Y->rmap->N; 2189 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2190 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2191 2192 PetscFunctionBegin; 2193 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 #undef __FUNCT__ 2198 #define __FUNCT__ "MatAXPY_MPIAIJ" 2199 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2200 { 2201 PetscErrorCode ierr; 2202 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2203 PetscBLASInt bnz,one=1; 2204 Mat_SeqAIJ *x,*y; 2205 2206 PetscFunctionBegin; 2207 if (str == SAME_NONZERO_PATTERN) { 2208 PetscScalar alpha = a; 2209 x = (Mat_SeqAIJ*)xx->A->data; 2210 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2211 y = (Mat_SeqAIJ*)yy->A->data; 2212 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2213 x = (Mat_SeqAIJ*)xx->B->data; 2214 y = (Mat_SeqAIJ*)yy->B->data; 2215 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2216 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2217 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2218 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2219 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2220 } else { 2221 Mat B; 2222 PetscInt *nnz_d,*nnz_o; 2223 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2224 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2225 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2226 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2227 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2228 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2229 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2231 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2232 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2233 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2234 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2235 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2236 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatConjugate_MPIAIJ" 2245 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2246 { 2247 #if defined(PETSC_USE_COMPLEX) 2248 PetscErrorCode ierr; 2249 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2250 2251 PetscFunctionBegin; 2252 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2253 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2254 #else 2255 PetscFunctionBegin; 2256 #endif 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatRealPart_MPIAIJ" 2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2263 { 2264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2265 PetscErrorCode ierr; 2266 2267 PetscFunctionBegin; 2268 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2269 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2276 { 2277 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2278 PetscErrorCode ierr; 2279 2280 PetscFunctionBegin; 2281 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2282 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2283 PetscFunctionReturn(0); 2284 } 2285 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2288 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2289 { 2290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2291 PetscErrorCode ierr; 2292 PetscInt i,*idxb = 0; 2293 PetscScalar *va,*vb; 2294 Vec vtmp; 2295 2296 PetscFunctionBegin; 2297 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2298 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2299 if (idx) { 2300 for (i=0; i<A->rmap->n; i++) { 2301 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2302 } 2303 } 2304 2305 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2306 if (idx) { 2307 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2308 } 2309 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2310 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2311 2312 for (i=0; i<A->rmap->n; i++) { 2313 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2314 va[i] = vb[i]; 2315 if (idx) idx[i] = a->garray[idxb[i]]; 2316 } 2317 } 2318 2319 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2320 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2321 ierr = PetscFree(idxb);CHKERRQ(ierr); 2322 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2328 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2329 { 2330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2331 PetscErrorCode ierr; 2332 PetscInt i,*idxb = 0; 2333 PetscScalar *va,*vb; 2334 Vec vtmp; 2335 2336 PetscFunctionBegin; 2337 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2338 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2339 if (idx) { 2340 for (i=0; i<A->cmap->n; i++) { 2341 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2342 } 2343 } 2344 2345 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2346 if (idx) { 2347 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2348 } 2349 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2350 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2351 2352 for (i=0; i<A->rmap->n; i++) { 2353 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2354 va[i] = vb[i]; 2355 if (idx) idx[i] = a->garray[idxb[i]]; 2356 } 2357 } 2358 2359 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2360 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2361 ierr = PetscFree(idxb);CHKERRQ(ierr); 2362 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 #undef __FUNCT__ 2367 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2368 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2369 { 2370 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2371 PetscInt n = A->rmap->n; 2372 PetscInt cstart = A->cmap->rstart; 2373 PetscInt *cmap = mat->garray; 2374 PetscInt *diagIdx, *offdiagIdx; 2375 Vec diagV, offdiagV; 2376 PetscScalar *a, *diagA, *offdiagA; 2377 PetscInt r; 2378 PetscErrorCode ierr; 2379 2380 PetscFunctionBegin; 2381 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2382 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2383 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2384 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2385 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2386 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2387 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2388 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2389 for (r = 0; r < n; ++r) { 2390 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2391 a[r] = diagA[r]; 2392 idx[r] = cstart + diagIdx[r]; 2393 } else { 2394 a[r] = offdiagA[r]; 2395 idx[r] = cmap[offdiagIdx[r]]; 2396 } 2397 } 2398 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2399 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2400 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2401 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2402 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2403 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2409 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2410 { 2411 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2412 PetscInt n = A->rmap->n; 2413 PetscInt cstart = A->cmap->rstart; 2414 PetscInt *cmap = mat->garray; 2415 PetscInt *diagIdx, *offdiagIdx; 2416 Vec diagV, offdiagV; 2417 PetscScalar *a, *diagA, *offdiagA; 2418 PetscInt r; 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2423 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2424 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2425 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2426 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2427 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2428 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2429 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2430 for (r = 0; r < n; ++r) { 2431 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2432 a[r] = diagA[r]; 2433 idx[r] = cstart + diagIdx[r]; 2434 } else { 2435 a[r] = offdiagA[r]; 2436 idx[r] = cmap[offdiagIdx[r]]; 2437 } 2438 } 2439 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2441 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2442 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2443 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2444 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2445 PetscFunctionReturn(0); 2446 } 2447 2448 #undef __FUNCT__ 2449 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2450 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2451 { 2452 PetscErrorCode ierr; 2453 Mat *dummy; 2454 2455 PetscFunctionBegin; 2456 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2457 *newmat = *dummy; 2458 ierr = PetscFree(dummy);CHKERRQ(ierr); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 #undef __FUNCT__ 2463 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2464 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2465 { 2466 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2467 PetscErrorCode ierr; 2468 2469 PetscFunctionBegin; 2470 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2471 A->factorerrortype = a->A->factorerrortype; 2472 PetscFunctionReturn(0); 2473 } 2474 2475 #undef __FUNCT__ 2476 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2477 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2478 { 2479 PetscErrorCode ierr; 2480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2481 2482 PetscFunctionBegin; 2483 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2484 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2485 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #undef __FUNCT__ 2491 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2493 { 2494 PetscFunctionBegin; 2495 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2496 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2502 /*@ 2503 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2504 2505 Collective on Mat 2506 2507 Input Parameters: 2508 + A - the matrix 2509 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2510 2511 Level: advanced 2512 2513 @*/ 2514 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2515 { 2516 PetscErrorCode ierr; 2517 2518 PetscFunctionBegin; 2519 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2525 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2526 { 2527 PetscErrorCode ierr; 2528 PetscBool sc = PETSC_FALSE,flg; 2529 2530 PetscFunctionBegin; 2531 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2532 ierr = PetscObjectOptionsBegin((PetscObject)A); 2533 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2534 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2535 if (flg) { 2536 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2537 } 2538 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatShift_MPIAIJ" 2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2545 { 2546 PetscErrorCode ierr; 2547 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2548 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2549 2550 PetscFunctionBegin; 2551 if (!Y->preallocated) { 2552 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2553 } else if (!aij->nz) { 2554 PetscInt nonew = aij->nonew; 2555 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2556 aij->nonew = nonew; 2557 } 2558 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 #undef __FUNCT__ 2563 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2571 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2572 if (d) { 2573 PetscInt rstart; 2574 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2575 *d += rstart; 2576 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 2582 /* -------------------------------------------------------------------*/ 2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2584 MatGetRow_MPIAIJ, 2585 MatRestoreRow_MPIAIJ, 2586 MatMult_MPIAIJ, 2587 /* 4*/ MatMultAdd_MPIAIJ, 2588 MatMultTranspose_MPIAIJ, 2589 MatMultTransposeAdd_MPIAIJ, 2590 0, 2591 0, 2592 0, 2593 /*10*/ 0, 2594 0, 2595 0, 2596 MatSOR_MPIAIJ, 2597 MatTranspose_MPIAIJ, 2598 /*15*/ MatGetInfo_MPIAIJ, 2599 MatEqual_MPIAIJ, 2600 MatGetDiagonal_MPIAIJ, 2601 MatDiagonalScale_MPIAIJ, 2602 MatNorm_MPIAIJ, 2603 /*20*/ MatAssemblyBegin_MPIAIJ, 2604 MatAssemblyEnd_MPIAIJ, 2605 MatSetOption_MPIAIJ, 2606 MatZeroEntries_MPIAIJ, 2607 /*24*/ MatZeroRows_MPIAIJ, 2608 0, 2609 0, 2610 0, 2611 0, 2612 /*29*/ MatSetUp_MPIAIJ, 2613 0, 2614 0, 2615 MatGetDiagonalBlock_MPIAIJ, 2616 0, 2617 /*34*/ MatDuplicate_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*39*/ MatAXPY_MPIAIJ, 2623 MatGetSubMatrices_MPIAIJ, 2624 MatIncreaseOverlap_MPIAIJ, 2625 MatGetValues_MPIAIJ, 2626 MatCopy_MPIAIJ, 2627 /*44*/ MatGetRowMax_MPIAIJ, 2628 MatScale_MPIAIJ, 2629 MatShift_MPIAIJ, 2630 MatDiagonalSet_MPIAIJ, 2631 MatZeroRowsColumns_MPIAIJ, 2632 /*49*/ MatSetRandom_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*54*/ MatFDColoringCreate_MPIXAIJ, 2638 0, 2639 MatSetUnfactored_MPIAIJ, 2640 MatPermute_MPIAIJ, 2641 0, 2642 /*59*/ MatGetSubMatrix_MPIAIJ, 2643 MatDestroy_MPIAIJ, 2644 MatView_MPIAIJ, 2645 0, 2646 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2647 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2648 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2649 0, 2650 0, 2651 0, 2652 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2653 MatGetRowMinAbs_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 0, 2658 /*75*/ MatFDColoringApply_AIJ, 2659 MatSetFromOptions_MPIAIJ, 2660 0, 2661 0, 2662 MatFindZeroDiagonals_MPIAIJ, 2663 /*80*/ 0, 2664 0, 2665 0, 2666 /*83*/ MatLoad_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2673 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2674 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2675 MatPtAP_MPIAIJ_MPIAIJ, 2676 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2677 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*99*/ 0, 2683 0, 2684 0, 2685 MatConjugate_MPIAIJ, 2686 0, 2687 /*104*/MatSetValuesRow_MPIAIJ, 2688 MatRealPart_MPIAIJ, 2689 MatImaginaryPart_MPIAIJ, 2690 0, 2691 0, 2692 /*109*/0, 2693 0, 2694 MatGetRowMin_MPIAIJ, 2695 0, 2696 MatMissingDiagonal_MPIAIJ, 2697 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2698 0, 2699 MatGetGhosts_MPIAIJ, 2700 0, 2701 0, 2702 /*119*/0, 2703 0, 2704 0, 2705 0, 2706 MatGetMultiProcBlock_MPIAIJ, 2707 /*124*/MatFindNonzeroRows_MPIAIJ, 2708 MatGetColumnNorms_MPIAIJ, 2709 MatInvertBlockDiagonal_MPIAIJ, 2710 0, 2711 MatGetSubMatricesMPI_MPIAIJ, 2712 /*129*/0, 2713 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2715 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2716 0, 2717 /*134*/0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /*139*/MatSetBlockSizes_MPIAIJ, 2723 0, 2724 0, 2725 MatFDColoringSetUp_MPIXAIJ, 2726 MatFindOffBlockDiagonalEntries_MPIAIJ, 2727 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2728 }; 2729 2730 /* ----------------------------------------------------------------------------------------*/ 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2734 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2747 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2748 { 2749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2754 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2760 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2761 { 2762 Mat_MPIAIJ *b; 2763 PetscErrorCode ierr; 2764 2765 PetscFunctionBegin; 2766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2768 b = (Mat_MPIAIJ*)B->data; 2769 2770 #if defined(PETSC_USE_CTABLE) 2771 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2772 #else 2773 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2774 #endif 2775 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2776 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2777 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2778 2779 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2780 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2783 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2784 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2786 2787 if (!B->preallocated) { 2788 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2789 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2790 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2791 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2793 } 2794 2795 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2796 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2797 B->preallocated = PETSC_TRUE; 2798 B->was_assembled = PETSC_FALSE; 2799 B->assembled = PETSC_FALSE;; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2805 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2806 { 2807 Mat mat; 2808 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2809 PetscErrorCode ierr; 2810 2811 PetscFunctionBegin; 2812 *newmat = 0; 2813 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2814 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2815 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2816 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2818 a = (Mat_MPIAIJ*)mat->data; 2819 2820 mat->factortype = matin->factortype; 2821 mat->assembled = PETSC_TRUE; 2822 mat->insertmode = NOT_SET_VALUES; 2823 mat->preallocated = PETSC_TRUE; 2824 2825 a->size = oldmat->size; 2826 a->rank = oldmat->rank; 2827 a->donotstash = oldmat->donotstash; 2828 a->roworiented = oldmat->roworiented; 2829 a->rowindices = 0; 2830 a->rowvalues = 0; 2831 a->getrowactive = PETSC_FALSE; 2832 2833 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2834 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2835 2836 if (oldmat->colmap) { 2837 #if defined(PETSC_USE_CTABLE) 2838 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2839 #else 2840 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2841 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2843 #endif 2844 } else a->colmap = 0; 2845 if (oldmat->garray) { 2846 PetscInt len; 2847 len = oldmat->B->cmap->n; 2848 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2849 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2850 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2851 } else a->garray = 0; 2852 2853 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2854 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2855 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2856 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2857 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2858 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2859 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2860 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2861 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2862 *newmat = mat; 2863 PetscFunctionReturn(0); 2864 } 2865 2866 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatLoad_MPIAIJ" 2870 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2871 { 2872 PetscScalar *vals,*svals; 2873 MPI_Comm comm; 2874 PetscErrorCode ierr; 2875 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2876 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2877 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2878 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2879 PetscInt cend,cstart,n,*rowners; 2880 int fd; 2881 PetscInt bs = newMat->rmap->bs; 2882 2883 PetscFunctionBegin; 2884 /* force binary viewer to load .info file if it has not yet done so */ 2885 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2886 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2887 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2888 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2889 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2890 if (!rank) { 2891 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2892 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2893 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2894 } 2895 2896 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2897 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2898 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2899 if (bs < 0) bs = 1; 2900 2901 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2902 M = header[1]; N = header[2]; 2903 2904 /* If global sizes are set, check if they are consistent with that given in the file */ 2905 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2906 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2907 2908 /* determine ownership of all (block) rows */ 2909 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2910 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2911 else m = newMat->rmap->n; /* Set by user */ 2912 2913 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2914 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2915 2916 /* First process needs enough room for process with most rows */ 2917 if (!rank) { 2918 mmax = rowners[1]; 2919 for (i=2; i<=size; i++) { 2920 mmax = PetscMax(mmax, rowners[i]); 2921 } 2922 } else mmax = -1; /* unused, but compilers complain */ 2923 2924 rowners[0] = 0; 2925 for (i=2; i<=size; i++) { 2926 rowners[i] += rowners[i-1]; 2927 } 2928 rstart = rowners[rank]; 2929 rend = rowners[rank+1]; 2930 2931 /* distribute row lengths to all processors */ 2932 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2933 if (!rank) { 2934 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2935 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2936 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2937 for (j=0; j<m; j++) { 2938 procsnz[0] += ourlens[j]; 2939 } 2940 for (i=1; i<size; i++) { 2941 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2942 /* calculate the number of nonzeros on each processor */ 2943 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2944 procsnz[i] += rowlengths[j]; 2945 } 2946 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2947 } 2948 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2949 } else { 2950 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2951 } 2952 2953 if (!rank) { 2954 /* determine max buffer needed and allocate it */ 2955 maxnz = 0; 2956 for (i=0; i<size; i++) { 2957 maxnz = PetscMax(maxnz,procsnz[i]); 2958 } 2959 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2960 2961 /* read in my part of the matrix column indices */ 2962 nz = procsnz[0]; 2963 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2964 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2965 2966 /* read in every one elses and ship off */ 2967 for (i=1; i<size; i++) { 2968 nz = procsnz[i]; 2969 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2970 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2971 } 2972 ierr = PetscFree(cols);CHKERRQ(ierr); 2973 } else { 2974 /* determine buffer space needed for message */ 2975 nz = 0; 2976 for (i=0; i<m; i++) { 2977 nz += ourlens[i]; 2978 } 2979 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2980 2981 /* receive message of column indices*/ 2982 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2983 } 2984 2985 /* determine column ownership if matrix is not square */ 2986 if (N != M) { 2987 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2988 else n = newMat->cmap->n; 2989 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2990 cstart = cend - n; 2991 } else { 2992 cstart = rstart; 2993 cend = rend; 2994 n = cend - cstart; 2995 } 2996 2997 /* loop over local rows, determining number of off diagonal entries */ 2998 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2999 jj = 0; 3000 for (i=0; i<m; i++) { 3001 for (j=0; j<ourlens[i]; j++) { 3002 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3003 jj++; 3004 } 3005 } 3006 3007 for (i=0; i<m; i++) { 3008 ourlens[i] -= offlens[i]; 3009 } 3010 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3011 3012 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3013 3014 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3015 3016 for (i=0; i<m; i++) { 3017 ourlens[i] += offlens[i]; 3018 } 3019 3020 if (!rank) { 3021 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3022 3023 /* read in my part of the matrix numerical values */ 3024 nz = procsnz[0]; 3025 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3026 3027 /* insert into matrix */ 3028 jj = rstart; 3029 smycols = mycols; 3030 svals = vals; 3031 for (i=0; i<m; i++) { 3032 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3033 smycols += ourlens[i]; 3034 svals += ourlens[i]; 3035 jj++; 3036 } 3037 3038 /* read in other processors and ship out */ 3039 for (i=1; i<size; i++) { 3040 nz = procsnz[i]; 3041 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3042 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3043 } 3044 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3045 } else { 3046 /* receive numeric values */ 3047 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3048 3049 /* receive message of values*/ 3050 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3051 3052 /* insert into matrix */ 3053 jj = rstart; 3054 smycols = mycols; 3055 svals = vals; 3056 for (i=0; i<m; i++) { 3057 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3058 smycols += ourlens[i]; 3059 svals += ourlens[i]; 3060 jj++; 3061 } 3062 } 3063 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3064 ierr = PetscFree(vals);CHKERRQ(ierr); 3065 ierr = PetscFree(mycols);CHKERRQ(ierr); 3066 ierr = PetscFree(rowners);CHKERRQ(ierr); 3067 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3074 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3076 { 3077 PetscErrorCode ierr; 3078 IS iscol_local; 3079 PetscInt csize; 3080 3081 PetscFunctionBegin; 3082 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3083 if (call == MAT_REUSE_MATRIX) { 3084 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3085 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3086 } else { 3087 /* check if we are grabbing all columns*/ 3088 PetscBool isstride; 3089 PetscMPIInt lisstride = 0,gisstride; 3090 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3091 if (isstride) { 3092 PetscInt start,len,mstart,mlen; 3093 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3094 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3095 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3096 if (mstart == start && mlen-mstart == len) lisstride = 1; 3097 } 3098 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3099 if (gisstride) { 3100 PetscInt N; 3101 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3102 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3103 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3104 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3105 } else { 3106 PetscInt cbs; 3107 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3108 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3109 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3110 } 3111 } 3112 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3113 if (call == MAT_INITIAL_MATRIX) { 3114 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3115 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3116 } 3117 PetscFunctionReturn(0); 3118 } 3119 3120 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3121 #undef __FUNCT__ 3122 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3123 /* 3124 Not great since it makes two copies of the submatrix, first an SeqAIJ 3125 in local and then by concatenating the local matrices the end result. 3126 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3127 3128 Note: This requires a sequential iscol with all indices. 3129 */ 3130 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3131 { 3132 PetscErrorCode ierr; 3133 PetscMPIInt rank,size; 3134 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3135 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3136 PetscBool allcolumns, colflag; 3137 Mat M,Mreuse; 3138 MatScalar *vwork,*aa; 3139 MPI_Comm comm; 3140 Mat_SeqAIJ *aij; 3141 3142 PetscFunctionBegin; 3143 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3144 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3145 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3146 3147 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3148 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3149 if (colflag && ncol == mat->cmap->N) { 3150 allcolumns = PETSC_TRUE; 3151 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3152 } else { 3153 allcolumns = PETSC_FALSE; 3154 } 3155 if (call == MAT_REUSE_MATRIX) { 3156 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3157 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3158 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3159 } else { 3160 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3161 } 3162 3163 /* 3164 m - number of local rows 3165 n - number of columns (same on all processors) 3166 rstart - first row in new global matrix generated 3167 */ 3168 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3169 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3170 if (call == MAT_INITIAL_MATRIX) { 3171 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3172 ii = aij->i; 3173 jj = aij->j; 3174 3175 /* 3176 Determine the number of non-zeros in the diagonal and off-diagonal 3177 portions of the matrix in order to do correct preallocation 3178 */ 3179 3180 /* first get start and end of "diagonal" columns */ 3181 if (csize == PETSC_DECIDE) { 3182 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3183 if (mglobal == n) { /* square matrix */ 3184 nlocal = m; 3185 } else { 3186 nlocal = n/size + ((n % size) > rank); 3187 } 3188 } else { 3189 nlocal = csize; 3190 } 3191 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3192 rstart = rend - nlocal; 3193 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3194 3195 /* next, compute all the lengths */ 3196 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3197 olens = dlens + m; 3198 for (i=0; i<m; i++) { 3199 jend = ii[i+1] - ii[i]; 3200 olen = 0; 3201 dlen = 0; 3202 for (j=0; j<jend; j++) { 3203 if (*jj < rstart || *jj >= rend) olen++; 3204 else dlen++; 3205 jj++; 3206 } 3207 olens[i] = olen; 3208 dlens[i] = dlen; 3209 } 3210 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3211 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3212 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3213 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3214 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3215 ierr = PetscFree(dlens);CHKERRQ(ierr); 3216 } else { 3217 PetscInt ml,nl; 3218 3219 M = *newmat; 3220 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3221 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3222 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3223 /* 3224 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3225 rather than the slower MatSetValues(). 3226 */ 3227 M->was_assembled = PETSC_TRUE; 3228 M->assembled = PETSC_FALSE; 3229 } 3230 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3231 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3232 ii = aij->i; 3233 jj = aij->j; 3234 aa = aij->a; 3235 for (i=0; i<m; i++) { 3236 row = rstart + i; 3237 nz = ii[i+1] - ii[i]; 3238 cwork = jj; jj += nz; 3239 vwork = aa; aa += nz; 3240 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3241 } 3242 3243 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3245 *newmat = M; 3246 3247 /* save submatrix used in processor for next request */ 3248 if (call == MAT_INITIAL_MATRIX) { 3249 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3250 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3251 } 3252 PetscFunctionReturn(0); 3253 } 3254 3255 #undef __FUNCT__ 3256 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3257 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3258 { 3259 PetscInt m,cstart, cend,j,nnz,i,d; 3260 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3261 const PetscInt *JJ; 3262 PetscScalar *values; 3263 PetscErrorCode ierr; 3264 3265 PetscFunctionBegin; 3266 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3267 3268 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3269 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3270 m = B->rmap->n; 3271 cstart = B->cmap->rstart; 3272 cend = B->cmap->rend; 3273 rstart = B->rmap->rstart; 3274 3275 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3276 3277 #if defined(PETSC_USE_DEBUGGING) 3278 for (i=0; i<m; i++) { 3279 nnz = Ii[i+1]- Ii[i]; 3280 JJ = J + Ii[i]; 3281 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3282 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3283 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3284 } 3285 #endif 3286 3287 for (i=0; i<m; i++) { 3288 nnz = Ii[i+1]- Ii[i]; 3289 JJ = J + Ii[i]; 3290 nnz_max = PetscMax(nnz_max,nnz); 3291 d = 0; 3292 for (j=0; j<nnz; j++) { 3293 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3294 } 3295 d_nnz[i] = d; 3296 o_nnz[i] = nnz - d; 3297 } 3298 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3299 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3300 3301 if (v) values = (PetscScalar*)v; 3302 else { 3303 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3304 } 3305 3306 for (i=0; i<m; i++) { 3307 ii = i + rstart; 3308 nnz = Ii[i+1]- Ii[i]; 3309 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3310 } 3311 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3313 3314 if (!v) { 3315 ierr = PetscFree(values);CHKERRQ(ierr); 3316 } 3317 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3318 PetscFunctionReturn(0); 3319 } 3320 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3323 /*@ 3324 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3325 (the default parallel PETSc format). 3326 3327 Collective on MPI_Comm 3328 3329 Input Parameters: 3330 + B - the matrix 3331 . i - the indices into j for the start of each local row (starts with zero) 3332 . j - the column indices for each local row (starts with zero) 3333 - v - optional values in the matrix 3334 3335 Level: developer 3336 3337 Notes: 3338 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3339 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3340 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3341 3342 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3343 3344 The format which is used for the sparse matrix input, is equivalent to a 3345 row-major ordering.. i.e for the following matrix, the input data expected is 3346 as shown 3347 3348 $ 1 0 0 3349 $ 2 0 3 P0 3350 $ ------- 3351 $ 4 5 6 P1 3352 $ 3353 $ Process0 [P0]: rows_owned=[0,1] 3354 $ i = {0,1,3} [size = nrow+1 = 2+1] 3355 $ j = {0,0,2} [size = 3] 3356 $ v = {1,2,3} [size = 3] 3357 $ 3358 $ Process1 [P1]: rows_owned=[2] 3359 $ i = {0,3} [size = nrow+1 = 1+1] 3360 $ j = {0,1,2} [size = 3] 3361 $ v = {4,5,6} [size = 3] 3362 3363 .keywords: matrix, aij, compressed row, sparse, parallel 3364 3365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3366 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3367 @*/ 3368 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3369 { 3370 PetscErrorCode ierr; 3371 3372 PetscFunctionBegin; 3373 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3374 PetscFunctionReturn(0); 3375 } 3376 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3379 /*@C 3380 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3381 (the default parallel PETSc format). For good matrix assembly performance 3382 the user should preallocate the matrix storage by setting the parameters 3383 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3384 performance can be increased by more than a factor of 50. 3385 3386 Collective on MPI_Comm 3387 3388 Input Parameters: 3389 + B - the matrix 3390 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3391 (same value is used for all local rows) 3392 . d_nnz - array containing the number of nonzeros in the various rows of the 3393 DIAGONAL portion of the local submatrix (possibly different for each row) 3394 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3395 The size of this array is equal to the number of local rows, i.e 'm'. 3396 For matrices that will be factored, you must leave room for (and set) 3397 the diagonal entry even if it is zero. 3398 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3399 submatrix (same value is used for all local rows). 3400 - o_nnz - array containing the number of nonzeros in the various rows of the 3401 OFF-DIAGONAL portion of the local submatrix (possibly different for 3402 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3403 structure. The size of this array is equal to the number 3404 of local rows, i.e 'm'. 3405 3406 If the *_nnz parameter is given then the *_nz parameter is ignored 3407 3408 The AIJ format (also called the Yale sparse matrix format or 3409 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3410 storage. The stored row and column indices begin with zero. 3411 See Users-Manual: ch_mat for details. 3412 3413 The parallel matrix is partitioned such that the first m0 rows belong to 3414 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3415 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3416 3417 The DIAGONAL portion of the local submatrix of a processor can be defined 3418 as the submatrix which is obtained by extraction the part corresponding to 3419 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3420 first row that belongs to the processor, r2 is the last row belonging to 3421 the this processor, and c1-c2 is range of indices of the local part of a 3422 vector suitable for applying the matrix to. This is an mxn matrix. In the 3423 common case of a square matrix, the row and column ranges are the same and 3424 the DIAGONAL part is also square. The remaining portion of the local 3425 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3426 3427 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3428 3429 You can call MatGetInfo() to get information on how effective the preallocation was; 3430 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3431 You can also run with the option -info and look for messages with the string 3432 malloc in them to see if additional memory allocation was needed. 3433 3434 Example usage: 3435 3436 Consider the following 8x8 matrix with 34 non-zero values, that is 3437 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3438 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3439 as follows: 3440 3441 .vb 3442 1 2 0 | 0 3 0 | 0 4 3443 Proc0 0 5 6 | 7 0 0 | 8 0 3444 9 0 10 | 11 0 0 | 12 0 3445 ------------------------------------- 3446 13 0 14 | 15 16 17 | 0 0 3447 Proc1 0 18 0 | 19 20 21 | 0 0 3448 0 0 0 | 22 23 0 | 24 0 3449 ------------------------------------- 3450 Proc2 25 26 27 | 0 0 28 | 29 0 3451 30 0 0 | 31 32 33 | 0 34 3452 .ve 3453 3454 This can be represented as a collection of submatrices as: 3455 3456 .vb 3457 A B C 3458 D E F 3459 G H I 3460 .ve 3461 3462 Where the submatrices A,B,C are owned by proc0, D,E,F are 3463 owned by proc1, G,H,I are owned by proc2. 3464 3465 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3466 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3467 The 'M','N' parameters are 8,8, and have the same values on all procs. 3468 3469 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3470 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3471 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3472 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3473 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3474 matrix, ans [DF] as another SeqAIJ matrix. 3475 3476 When d_nz, o_nz parameters are specified, d_nz storage elements are 3477 allocated for every row of the local diagonal submatrix, and o_nz 3478 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3479 One way to choose d_nz and o_nz is to use the max nonzerors per local 3480 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3481 In this case, the values of d_nz,o_nz are: 3482 .vb 3483 proc0 : dnz = 2, o_nz = 2 3484 proc1 : dnz = 3, o_nz = 2 3485 proc2 : dnz = 1, o_nz = 4 3486 .ve 3487 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3488 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3489 for proc3. i.e we are using 12+15+10=37 storage locations to store 3490 34 values. 3491 3492 When d_nnz, o_nnz parameters are specified, the storage is specified 3493 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3494 In the above case the values for d_nnz,o_nnz are: 3495 .vb 3496 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3497 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3498 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3499 .ve 3500 Here the space allocated is sum of all the above values i.e 34, and 3501 hence pre-allocation is perfect. 3502 3503 Level: intermediate 3504 3505 .keywords: matrix, aij, compressed row, sparse, parallel 3506 3507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3508 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3509 @*/ 3510 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3511 { 3512 PetscErrorCode ierr; 3513 3514 PetscFunctionBegin; 3515 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3516 PetscValidType(B,1); 3517 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3523 /*@ 3524 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3525 CSR format the local rows. 3526 3527 Collective on MPI_Comm 3528 3529 Input Parameters: 3530 + comm - MPI communicator 3531 . m - number of local rows (Cannot be PETSC_DECIDE) 3532 . n - This value should be the same as the local size used in creating the 3533 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3534 calculated if N is given) For square matrices n is almost always m. 3535 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3536 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3537 . i - row indices 3538 . j - column indices 3539 - a - matrix values 3540 3541 Output Parameter: 3542 . mat - the matrix 3543 3544 Level: intermediate 3545 3546 Notes: 3547 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3548 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3549 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3550 3551 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3552 3553 The format which is used for the sparse matrix input, is equivalent to a 3554 row-major ordering.. i.e for the following matrix, the input data expected is 3555 as shown 3556 3557 $ 1 0 0 3558 $ 2 0 3 P0 3559 $ ------- 3560 $ 4 5 6 P1 3561 $ 3562 $ Process0 [P0]: rows_owned=[0,1] 3563 $ i = {0,1,3} [size = nrow+1 = 2+1] 3564 $ j = {0,0,2} [size = 3] 3565 $ v = {1,2,3} [size = 3] 3566 $ 3567 $ Process1 [P1]: rows_owned=[2] 3568 $ i = {0,3} [size = nrow+1 = 1+1] 3569 $ j = {0,1,2} [size = 3] 3570 $ v = {4,5,6} [size = 3] 3571 3572 .keywords: matrix, aij, compressed row, sparse, parallel 3573 3574 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3575 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3576 @*/ 3577 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3578 { 3579 PetscErrorCode ierr; 3580 3581 PetscFunctionBegin; 3582 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3583 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3584 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3585 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3586 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3587 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3588 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3589 PetscFunctionReturn(0); 3590 } 3591 3592 #undef __FUNCT__ 3593 #define __FUNCT__ "MatCreateAIJ" 3594 /*@C 3595 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3596 (the default parallel PETSc format). For good matrix assembly performance 3597 the user should preallocate the matrix storage by setting the parameters 3598 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3599 performance can be increased by more than a factor of 50. 3600 3601 Collective on MPI_Comm 3602 3603 Input Parameters: 3604 + comm - MPI communicator 3605 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3606 This value should be the same as the local size used in creating the 3607 y vector for the matrix-vector product y = Ax. 3608 . n - This value should be the same as the local size used in creating the 3609 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3610 calculated if N is given) For square matrices n is almost always m. 3611 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3612 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3613 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3614 (same value is used for all local rows) 3615 . d_nnz - array containing the number of nonzeros in the various rows of the 3616 DIAGONAL portion of the local submatrix (possibly different for each row) 3617 or NULL, if d_nz is used to specify the nonzero structure. 3618 The size of this array is equal to the number of local rows, i.e 'm'. 3619 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3620 submatrix (same value is used for all local rows). 3621 - o_nnz - array containing the number of nonzeros in the various rows of the 3622 OFF-DIAGONAL portion of the local submatrix (possibly different for 3623 each row) or NULL, if o_nz is used to specify the nonzero 3624 structure. The size of this array is equal to the number 3625 of local rows, i.e 'm'. 3626 3627 Output Parameter: 3628 . A - the matrix 3629 3630 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3631 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3632 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3633 3634 Notes: 3635 If the *_nnz parameter is given then the *_nz parameter is ignored 3636 3637 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3638 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3639 storage requirements for this matrix. 3640 3641 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3642 processor than it must be used on all processors that share the object for 3643 that argument. 3644 3645 The user MUST specify either the local or global matrix dimensions 3646 (possibly both). 3647 3648 The parallel matrix is partitioned across processors such that the 3649 first m0 rows belong to process 0, the next m1 rows belong to 3650 process 1, the next m2 rows belong to process 2 etc.. where 3651 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3652 values corresponding to [m x N] submatrix. 3653 3654 The columns are logically partitioned with the n0 columns belonging 3655 to 0th partition, the next n1 columns belonging to the next 3656 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3657 3658 The DIAGONAL portion of the local submatrix on any given processor 3659 is the submatrix corresponding to the rows and columns m,n 3660 corresponding to the given processor. i.e diagonal matrix on 3661 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3662 etc. The remaining portion of the local submatrix [m x (N-n)] 3663 constitute the OFF-DIAGONAL portion. The example below better 3664 illustrates this concept. 3665 3666 For a square global matrix we define each processor's diagonal portion 3667 to be its local rows and the corresponding columns (a square submatrix); 3668 each processor's off-diagonal portion encompasses the remainder of the 3669 local matrix (a rectangular submatrix). 3670 3671 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3672 3673 When calling this routine with a single process communicator, a matrix of 3674 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3675 type of communicator, use the construction mechanism: 3676 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3677 3678 By default, this format uses inodes (identical nodes) when possible. 3679 We search for consecutive rows with the same nonzero structure, thereby 3680 reusing matrix information to achieve increased efficiency. 3681 3682 Options Database Keys: 3683 + -mat_no_inode - Do not use inodes 3684 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3685 - -mat_aij_oneindex - Internally use indexing starting at 1 3686 rather than 0. Note that when calling MatSetValues(), 3687 the user still MUST index entries starting at 0! 3688 3689 3690 Example usage: 3691 3692 Consider the following 8x8 matrix with 34 non-zero values, that is 3693 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3694 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3695 as follows: 3696 3697 .vb 3698 1 2 0 | 0 3 0 | 0 4 3699 Proc0 0 5 6 | 7 0 0 | 8 0 3700 9 0 10 | 11 0 0 | 12 0 3701 ------------------------------------- 3702 13 0 14 | 15 16 17 | 0 0 3703 Proc1 0 18 0 | 19 20 21 | 0 0 3704 0 0 0 | 22 23 0 | 24 0 3705 ------------------------------------- 3706 Proc2 25 26 27 | 0 0 28 | 29 0 3707 30 0 0 | 31 32 33 | 0 34 3708 .ve 3709 3710 This can be represented as a collection of submatrices as: 3711 3712 .vb 3713 A B C 3714 D E F 3715 G H I 3716 .ve 3717 3718 Where the submatrices A,B,C are owned by proc0, D,E,F are 3719 owned by proc1, G,H,I are owned by proc2. 3720 3721 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3722 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3723 The 'M','N' parameters are 8,8, and have the same values on all procs. 3724 3725 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3726 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3727 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3728 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3729 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3730 matrix, ans [DF] as another SeqAIJ matrix. 3731 3732 When d_nz, o_nz parameters are specified, d_nz storage elements are 3733 allocated for every row of the local diagonal submatrix, and o_nz 3734 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3735 One way to choose d_nz and o_nz is to use the max nonzerors per local 3736 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3737 In this case, the values of d_nz,o_nz are: 3738 .vb 3739 proc0 : dnz = 2, o_nz = 2 3740 proc1 : dnz = 3, o_nz = 2 3741 proc2 : dnz = 1, o_nz = 4 3742 .ve 3743 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3744 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3745 for proc3. i.e we are using 12+15+10=37 storage locations to store 3746 34 values. 3747 3748 When d_nnz, o_nnz parameters are specified, the storage is specified 3749 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3750 In the above case the values for d_nnz,o_nnz are: 3751 .vb 3752 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3753 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3754 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3755 .ve 3756 Here the space allocated is sum of all the above values i.e 34, and 3757 hence pre-allocation is perfect. 3758 3759 Level: intermediate 3760 3761 .keywords: matrix, aij, compressed row, sparse, parallel 3762 3763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3764 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3765 @*/ 3766 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3767 { 3768 PetscErrorCode ierr; 3769 PetscMPIInt size; 3770 3771 PetscFunctionBegin; 3772 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3773 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3774 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3775 if (size > 1) { 3776 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3777 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3778 } else { 3779 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3780 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3781 } 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3787 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3788 { 3789 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3790 PetscBool flg; 3791 PetscErrorCode ierr; 3792 3793 PetscFunctionBegin; 3794 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3795 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3796 if (Ad) *Ad = a->A; 3797 if (Ao) *Ao = a->B; 3798 if (colmap) *colmap = a->garray; 3799 PetscFunctionReturn(0); 3800 } 3801 3802 #undef __FUNCT__ 3803 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3804 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3805 { 3806 PetscErrorCode ierr; 3807 PetscInt m,N,i,rstart,nnz,Ii; 3808 PetscInt *indx; 3809 PetscScalar *values; 3810 3811 PetscFunctionBegin; 3812 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3813 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3814 PetscInt *dnz,*onz,sum,bs,cbs; 3815 3816 if (n == PETSC_DECIDE) { 3817 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3818 } 3819 /* Check sum(n) = N */ 3820 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3821 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3822 3823 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3824 rstart -= m; 3825 3826 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3827 for (i=0; i<m; i++) { 3828 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3829 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3830 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3831 } 3832 3833 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3834 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3835 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3836 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3837 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3838 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3839 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3840 } 3841 3842 /* numeric phase */ 3843 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3844 for (i=0; i<m; i++) { 3845 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3846 Ii = i + rstart; 3847 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3848 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3849 } 3850 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3851 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3852 PetscFunctionReturn(0); 3853 } 3854 3855 #undef __FUNCT__ 3856 #define __FUNCT__ "MatFileSplit" 3857 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3858 { 3859 PetscErrorCode ierr; 3860 PetscMPIInt rank; 3861 PetscInt m,N,i,rstart,nnz; 3862 size_t len; 3863 const PetscInt *indx; 3864 PetscViewer out; 3865 char *name; 3866 Mat B; 3867 const PetscScalar *values; 3868 3869 PetscFunctionBegin; 3870 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3871 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3872 /* Should this be the type of the diagonal block of A? */ 3873 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3874 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3875 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3876 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3877 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3878 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3879 for (i=0; i<m; i++) { 3880 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3881 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3882 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3883 } 3884 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 3887 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3888 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3889 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3890 sprintf(name,"%s.%d",outfile,rank); 3891 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3892 ierr = PetscFree(name);CHKERRQ(ierr); 3893 ierr = MatView(B,out);CHKERRQ(ierr); 3894 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3895 ierr = MatDestroy(&B);CHKERRQ(ierr); 3896 PetscFunctionReturn(0); 3897 } 3898 3899 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3902 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3903 { 3904 PetscErrorCode ierr; 3905 Mat_Merge_SeqsToMPI *merge; 3906 PetscContainer container; 3907 3908 PetscFunctionBegin; 3909 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3910 if (container) { 3911 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3922 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3923 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3924 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3925 ierr = PetscFree(merge);CHKERRQ(ierr); 3926 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3929 PetscFunctionReturn(0); 3930 } 3931 3932 #include <../src/mat/utils/freespace.h> 3933 #include <petscbt.h> 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3937 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3938 { 3939 PetscErrorCode ierr; 3940 MPI_Comm comm; 3941 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3942 PetscMPIInt size,rank,taga,*len_s; 3943 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3944 PetscInt proc,m; 3945 PetscInt **buf_ri,**buf_rj; 3946 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3947 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3948 MPI_Request *s_waits,*r_waits; 3949 MPI_Status *status; 3950 MatScalar *aa=a->a; 3951 MatScalar **abuf_r,*ba_i; 3952 Mat_Merge_SeqsToMPI *merge; 3953 PetscContainer container; 3954 3955 PetscFunctionBegin; 3956 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3957 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3958 3959 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3960 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3961 3962 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3963 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3964 3965 bi = merge->bi; 3966 bj = merge->bj; 3967 buf_ri = merge->buf_ri; 3968 buf_rj = merge->buf_rj; 3969 3970 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3971 owners = merge->rowmap->range; 3972 len_s = merge->len_s; 3973 3974 /* send and recv matrix values */ 3975 /*-----------------------------*/ 3976 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3977 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3978 3979 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3980 for (proc=0,k=0; proc<size; proc++) { 3981 if (!len_s[proc]) continue; 3982 i = owners[proc]; 3983 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3984 k++; 3985 } 3986 3987 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3988 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3989 ierr = PetscFree(status);CHKERRQ(ierr); 3990 3991 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3992 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3993 3994 /* insert mat values of mpimat */ 3995 /*----------------------------*/ 3996 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3997 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3998 3999 for (k=0; k<merge->nrecv; k++) { 4000 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4001 nrows = *(buf_ri_k[k]); 4002 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4003 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4004 } 4005 4006 /* set values of ba */ 4007 m = merge->rowmap->n; 4008 for (i=0; i<m; i++) { 4009 arow = owners[rank] + i; 4010 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4011 bnzi = bi[i+1] - bi[i]; 4012 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4013 4014 /* add local non-zero vals of this proc's seqmat into ba */ 4015 anzi = ai[arow+1] - ai[arow]; 4016 aj = a->j + ai[arow]; 4017 aa = a->a + ai[arow]; 4018 nextaj = 0; 4019 for (j=0; nextaj<anzi; j++) { 4020 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4021 ba_i[j] += aa[nextaj++]; 4022 } 4023 } 4024 4025 /* add received vals into ba */ 4026 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4027 /* i-th row */ 4028 if (i == *nextrow[k]) { 4029 anzi = *(nextai[k]+1) - *nextai[k]; 4030 aj = buf_rj[k] + *(nextai[k]); 4031 aa = abuf_r[k] + *(nextai[k]); 4032 nextaj = 0; 4033 for (j=0; nextaj<anzi; j++) { 4034 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4035 ba_i[j] += aa[nextaj++]; 4036 } 4037 } 4038 nextrow[k]++; nextai[k]++; 4039 } 4040 } 4041 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4042 } 4043 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4045 4046 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4047 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4048 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4049 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4050 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4051 PetscFunctionReturn(0); 4052 } 4053 4054 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4055 4056 #undef __FUNCT__ 4057 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4058 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4059 { 4060 PetscErrorCode ierr; 4061 Mat B_mpi; 4062 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4063 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4064 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4065 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4066 PetscInt len,proc,*dnz,*onz,bs,cbs; 4067 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4068 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4069 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4070 MPI_Status *status; 4071 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4072 PetscBT lnkbt; 4073 Mat_Merge_SeqsToMPI *merge; 4074 PetscContainer container; 4075 4076 PetscFunctionBegin; 4077 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4078 4079 /* make sure it is a PETSc comm */ 4080 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4081 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4082 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4083 4084 ierr = PetscNew(&merge);CHKERRQ(ierr); 4085 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4086 4087 /* determine row ownership */ 4088 /*---------------------------------------------------------*/ 4089 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4090 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4091 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4092 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4093 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4094 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4095 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4096 4097 m = merge->rowmap->n; 4098 owners = merge->rowmap->range; 4099 4100 /* determine the number of messages to send, their lengths */ 4101 /*---------------------------------------------------------*/ 4102 len_s = merge->len_s; 4103 4104 len = 0; /* length of buf_si[] */ 4105 merge->nsend = 0; 4106 for (proc=0; proc<size; proc++) { 4107 len_si[proc] = 0; 4108 if (proc == rank) { 4109 len_s[proc] = 0; 4110 } else { 4111 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4112 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4113 } 4114 if (len_s[proc]) { 4115 merge->nsend++; 4116 nrows = 0; 4117 for (i=owners[proc]; i<owners[proc+1]; i++) { 4118 if (ai[i+1] > ai[i]) nrows++; 4119 } 4120 len_si[proc] = 2*(nrows+1); 4121 len += len_si[proc]; 4122 } 4123 } 4124 4125 /* determine the number and length of messages to receive for ij-structure */ 4126 /*-------------------------------------------------------------------------*/ 4127 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4128 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4129 4130 /* post the Irecv of j-structure */ 4131 /*-------------------------------*/ 4132 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4133 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4134 4135 /* post the Isend of j-structure */ 4136 /*--------------------------------*/ 4137 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4138 4139 for (proc=0, k=0; proc<size; proc++) { 4140 if (!len_s[proc]) continue; 4141 i = owners[proc]; 4142 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4143 k++; 4144 } 4145 4146 /* receives and sends of j-structure are complete */ 4147 /*------------------------------------------------*/ 4148 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4149 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4150 4151 /* send and recv i-structure */ 4152 /*---------------------------*/ 4153 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4154 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4155 4156 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4157 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4158 for (proc=0,k=0; proc<size; proc++) { 4159 if (!len_s[proc]) continue; 4160 /* form outgoing message for i-structure: 4161 buf_si[0]: nrows to be sent 4162 [1:nrows]: row index (global) 4163 [nrows+1:2*nrows+1]: i-structure index 4164 */ 4165 /*-------------------------------------------*/ 4166 nrows = len_si[proc]/2 - 1; 4167 buf_si_i = buf_si + nrows+1; 4168 buf_si[0] = nrows; 4169 buf_si_i[0] = 0; 4170 nrows = 0; 4171 for (i=owners[proc]; i<owners[proc+1]; i++) { 4172 anzi = ai[i+1] - ai[i]; 4173 if (anzi) { 4174 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4175 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4176 nrows++; 4177 } 4178 } 4179 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4180 k++; 4181 buf_si += len_si[proc]; 4182 } 4183 4184 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4185 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4186 4187 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4188 for (i=0; i<merge->nrecv; i++) { 4189 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4190 } 4191 4192 ierr = PetscFree(len_si);CHKERRQ(ierr); 4193 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4194 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4195 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4196 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4197 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4198 ierr = PetscFree(status);CHKERRQ(ierr); 4199 4200 /* compute a local seq matrix in each processor */ 4201 /*----------------------------------------------*/ 4202 /* allocate bi array and free space for accumulating nonzero column info */ 4203 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4204 bi[0] = 0; 4205 4206 /* create and initialize a linked list */ 4207 nlnk = N+1; 4208 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4209 4210 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4211 len = ai[owners[rank+1]] - ai[owners[rank]]; 4212 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4213 4214 current_space = free_space; 4215 4216 /* determine symbolic info for each local row */ 4217 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4218 4219 for (k=0; k<merge->nrecv; k++) { 4220 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4221 nrows = *buf_ri_k[k]; 4222 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4223 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4224 } 4225 4226 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4227 len = 0; 4228 for (i=0; i<m; i++) { 4229 bnzi = 0; 4230 /* add local non-zero cols of this proc's seqmat into lnk */ 4231 arow = owners[rank] + i; 4232 anzi = ai[arow+1] - ai[arow]; 4233 aj = a->j + ai[arow]; 4234 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4235 bnzi += nlnk; 4236 /* add received col data into lnk */ 4237 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4238 if (i == *nextrow[k]) { /* i-th row */ 4239 anzi = *(nextai[k]+1) - *nextai[k]; 4240 aj = buf_rj[k] + *nextai[k]; 4241 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4242 bnzi += nlnk; 4243 nextrow[k]++; nextai[k]++; 4244 } 4245 } 4246 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4247 4248 /* if free space is not available, make more free space */ 4249 if (current_space->local_remaining<bnzi) { 4250 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4251 nspacedouble++; 4252 } 4253 /* copy data into free space, then initialize lnk */ 4254 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4255 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4256 4257 current_space->array += bnzi; 4258 current_space->local_used += bnzi; 4259 current_space->local_remaining -= bnzi; 4260 4261 bi[i+1] = bi[i] + bnzi; 4262 } 4263 4264 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4265 4266 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4267 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4268 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4269 4270 /* create symbolic parallel matrix B_mpi */ 4271 /*---------------------------------------*/ 4272 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4273 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4274 if (n==PETSC_DECIDE) { 4275 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4276 } else { 4277 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4278 } 4279 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4280 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4281 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4282 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4283 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4284 4285 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4286 B_mpi->assembled = PETSC_FALSE; 4287 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4288 merge->bi = bi; 4289 merge->bj = bj; 4290 merge->buf_ri = buf_ri; 4291 merge->buf_rj = buf_rj; 4292 merge->coi = NULL; 4293 merge->coj = NULL; 4294 merge->owners_co = NULL; 4295 4296 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4297 4298 /* attach the supporting struct to B_mpi for reuse */ 4299 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4300 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4301 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4302 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4303 *mpimat = B_mpi; 4304 4305 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4306 PetscFunctionReturn(0); 4307 } 4308 4309 #undef __FUNCT__ 4310 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4311 /*@C 4312 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4313 matrices from each processor 4314 4315 Collective on MPI_Comm 4316 4317 Input Parameters: 4318 + comm - the communicators the parallel matrix will live on 4319 . seqmat - the input sequential matrices 4320 . m - number of local rows (or PETSC_DECIDE) 4321 . n - number of local columns (or PETSC_DECIDE) 4322 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4323 4324 Output Parameter: 4325 . mpimat - the parallel matrix generated 4326 4327 Level: advanced 4328 4329 Notes: 4330 The dimensions of the sequential matrix in each processor MUST be the same. 4331 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4332 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4333 @*/ 4334 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4335 { 4336 PetscErrorCode ierr; 4337 PetscMPIInt size; 4338 4339 PetscFunctionBegin; 4340 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4341 if (size == 1) { 4342 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4343 if (scall == MAT_INITIAL_MATRIX) { 4344 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4345 } else { 4346 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4347 } 4348 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4349 PetscFunctionReturn(0); 4350 } 4351 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4352 if (scall == MAT_INITIAL_MATRIX) { 4353 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4354 } 4355 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4356 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4357 PetscFunctionReturn(0); 4358 } 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4362 /*@ 4363 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4364 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4365 with MatGetSize() 4366 4367 Not Collective 4368 4369 Input Parameters: 4370 + A - the matrix 4371 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4372 4373 Output Parameter: 4374 . A_loc - the local sequential matrix generated 4375 4376 Level: developer 4377 4378 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4379 4380 @*/ 4381 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4382 { 4383 PetscErrorCode ierr; 4384 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4385 Mat_SeqAIJ *mat,*a,*b; 4386 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4387 MatScalar *aa,*ba,*cam; 4388 PetscScalar *ca; 4389 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4390 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4391 PetscBool match; 4392 MPI_Comm comm; 4393 PetscMPIInt size; 4394 4395 PetscFunctionBegin; 4396 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4397 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4398 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4399 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4400 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4401 4402 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4403 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4404 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4405 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4406 aa = a->a; ba = b->a; 4407 if (scall == MAT_INITIAL_MATRIX) { 4408 if (size == 1) { 4409 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4410 PetscFunctionReturn(0); 4411 } 4412 4413 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4414 ci[0] = 0; 4415 for (i=0; i<am; i++) { 4416 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4417 } 4418 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4419 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4420 k = 0; 4421 for (i=0; i<am; i++) { 4422 ncols_o = bi[i+1] - bi[i]; 4423 ncols_d = ai[i+1] - ai[i]; 4424 /* off-diagonal portion of A */ 4425 for (jo=0; jo<ncols_o; jo++) { 4426 col = cmap[*bj]; 4427 if (col >= cstart) break; 4428 cj[k] = col; bj++; 4429 ca[k++] = *ba++; 4430 } 4431 /* diagonal portion of A */ 4432 for (j=0; j<ncols_d; j++) { 4433 cj[k] = cstart + *aj++; 4434 ca[k++] = *aa++; 4435 } 4436 /* off-diagonal portion of A */ 4437 for (j=jo; j<ncols_o; j++) { 4438 cj[k] = cmap[*bj++]; 4439 ca[k++] = *ba++; 4440 } 4441 } 4442 /* put together the new matrix */ 4443 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4444 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4445 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4446 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4447 mat->free_a = PETSC_TRUE; 4448 mat->free_ij = PETSC_TRUE; 4449 mat->nonew = 0; 4450 } else if (scall == MAT_REUSE_MATRIX) { 4451 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4452 ci = mat->i; cj = mat->j; cam = mat->a; 4453 for (i=0; i<am; i++) { 4454 /* off-diagonal portion of A */ 4455 ncols_o = bi[i+1] - bi[i]; 4456 for (jo=0; jo<ncols_o; jo++) { 4457 col = cmap[*bj]; 4458 if (col >= cstart) break; 4459 *cam++ = *ba++; bj++; 4460 } 4461 /* diagonal portion of A */ 4462 ncols_d = ai[i+1] - ai[i]; 4463 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4464 /* off-diagonal portion of A */ 4465 for (j=jo; j<ncols_o; j++) { 4466 *cam++ = *ba++; bj++; 4467 } 4468 } 4469 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4470 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4471 PetscFunctionReturn(0); 4472 } 4473 4474 #undef __FUNCT__ 4475 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4476 /*@C 4477 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4478 4479 Not Collective 4480 4481 Input Parameters: 4482 + A - the matrix 4483 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4484 - row, col - index sets of rows and columns to extract (or NULL) 4485 4486 Output Parameter: 4487 . A_loc - the local sequential matrix generated 4488 4489 Level: developer 4490 4491 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4492 4493 @*/ 4494 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4495 { 4496 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4497 PetscErrorCode ierr; 4498 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4499 IS isrowa,iscola; 4500 Mat *aloc; 4501 PetscBool match; 4502 4503 PetscFunctionBegin; 4504 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4505 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4506 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4507 if (!row) { 4508 start = A->rmap->rstart; end = A->rmap->rend; 4509 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4510 } else { 4511 isrowa = *row; 4512 } 4513 if (!col) { 4514 start = A->cmap->rstart; 4515 cmap = a->garray; 4516 nzA = a->A->cmap->n; 4517 nzB = a->B->cmap->n; 4518 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4519 ncols = 0; 4520 for (i=0; i<nzB; i++) { 4521 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4522 else break; 4523 } 4524 imark = i; 4525 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4526 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4527 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4528 } else { 4529 iscola = *col; 4530 } 4531 if (scall != MAT_INITIAL_MATRIX) { 4532 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4533 aloc[0] = *A_loc; 4534 } 4535 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4536 *A_loc = aloc[0]; 4537 ierr = PetscFree(aloc);CHKERRQ(ierr); 4538 if (!row) { 4539 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4540 } 4541 if (!col) { 4542 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4543 } 4544 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4545 PetscFunctionReturn(0); 4546 } 4547 4548 #undef __FUNCT__ 4549 #define __FUNCT__ "MatGetBrowsOfAcols" 4550 /*@C 4551 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4552 4553 Collective on Mat 4554 4555 Input Parameters: 4556 + A,B - the matrices in mpiaij format 4557 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4558 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4559 4560 Output Parameter: 4561 + rowb, colb - index sets of rows and columns of B to extract 4562 - B_seq - the sequential matrix generated 4563 4564 Level: developer 4565 4566 @*/ 4567 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4568 { 4569 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4570 PetscErrorCode ierr; 4571 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4572 IS isrowb,iscolb; 4573 Mat *bseq=NULL; 4574 4575 PetscFunctionBegin; 4576 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4577 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4578 } 4579 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4580 4581 if (scall == MAT_INITIAL_MATRIX) { 4582 start = A->cmap->rstart; 4583 cmap = a->garray; 4584 nzA = a->A->cmap->n; 4585 nzB = a->B->cmap->n; 4586 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4587 ncols = 0; 4588 for (i=0; i<nzB; i++) { /* row < local row index */ 4589 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4590 else break; 4591 } 4592 imark = i; 4593 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4594 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4595 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4596 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4597 } else { 4598 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4599 isrowb = *rowb; iscolb = *colb; 4600 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4601 bseq[0] = *B_seq; 4602 } 4603 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4604 *B_seq = bseq[0]; 4605 ierr = PetscFree(bseq);CHKERRQ(ierr); 4606 if (!rowb) { 4607 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4608 } else { 4609 *rowb = isrowb; 4610 } 4611 if (!colb) { 4612 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4613 } else { 4614 *colb = iscolb; 4615 } 4616 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4617 PetscFunctionReturn(0); 4618 } 4619 4620 #undef __FUNCT__ 4621 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4622 /* 4623 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4624 of the OFF-DIAGONAL portion of local A 4625 4626 Collective on Mat 4627 4628 Input Parameters: 4629 + A,B - the matrices in mpiaij format 4630 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4631 4632 Output Parameter: 4633 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4634 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4635 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4636 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4637 4638 Level: developer 4639 4640 */ 4641 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4642 { 4643 VecScatter_MPI_General *gen_to,*gen_from; 4644 PetscErrorCode ierr; 4645 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4646 Mat_SeqAIJ *b_oth; 4647 VecScatter ctx =a->Mvctx; 4648 MPI_Comm comm; 4649 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4650 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4651 PetscInt *rvalues,*svalues; 4652 MatScalar *b_otha,*bufa,*bufA; 4653 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4654 MPI_Request *rwaits = NULL,*swaits = NULL; 4655 MPI_Status *sstatus,rstatus; 4656 PetscMPIInt jj,size; 4657 PetscInt *cols,sbs,rbs; 4658 PetscScalar *vals; 4659 4660 PetscFunctionBegin; 4661 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4663 4664 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4665 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4666 } 4667 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4669 4670 gen_to = (VecScatter_MPI_General*)ctx->todata; 4671 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4672 nrecvs = gen_from->n; 4673 nsends = gen_to->n; 4674 4675 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4676 srow = gen_to->indices; /* local row index to be sent */ 4677 sstarts = gen_to->starts; 4678 sprocs = gen_to->procs; 4679 sstatus = gen_to->sstatus; 4680 sbs = gen_to->bs; 4681 rstarts = gen_from->starts; 4682 rprocs = gen_from->procs; 4683 rbs = gen_from->bs; 4684 4685 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4686 if (scall == MAT_INITIAL_MATRIX) { 4687 /* i-array */ 4688 /*---------*/ 4689 /* post receives */ 4690 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 4691 for (i=0; i<nrecvs; i++) { 4692 rowlen = rvalues + rstarts[i]*rbs; 4693 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4694 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4695 } 4696 4697 /* pack the outgoing message */ 4698 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4699 4700 sstartsj[0] = 0; 4701 rstartsj[0] = 0; 4702 len = 0; /* total length of j or a array to be sent */ 4703 k = 0; 4704 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 4705 for (i=0; i<nsends; i++) { 4706 rowlen = svalues + sstarts[i]*sbs; 4707 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4708 for (j=0; j<nrows; j++) { 4709 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4710 for (l=0; l<sbs; l++) { 4711 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4712 4713 rowlen[j*sbs+l] = ncols; 4714 4715 len += ncols; 4716 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4717 } 4718 k++; 4719 } 4720 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4721 4722 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4723 } 4724 /* recvs and sends of i-array are completed */ 4725 i = nrecvs; 4726 while (i--) { 4727 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4728 } 4729 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4730 ierr = PetscFree(svalues);CHKERRQ(ierr); 4731 4732 /* allocate buffers for sending j and a arrays */ 4733 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4734 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4735 4736 /* create i-array of B_oth */ 4737 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4738 4739 b_othi[0] = 0; 4740 len = 0; /* total length of j or a array to be received */ 4741 k = 0; 4742 for (i=0; i<nrecvs; i++) { 4743 rowlen = rvalues + rstarts[i]*rbs; 4744 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4745 for (j=0; j<nrows; j++) { 4746 b_othi[k+1] = b_othi[k] + rowlen[j]; 4747 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4748 k++; 4749 } 4750 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4751 } 4752 ierr = PetscFree(rvalues);CHKERRQ(ierr); 4753 4754 /* allocate space for j and a arrrays of B_oth */ 4755 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4756 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4757 4758 /* j-array */ 4759 /*---------*/ 4760 /* post receives of j-array */ 4761 for (i=0; i<nrecvs; i++) { 4762 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4763 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4764 } 4765 4766 /* pack the outgoing message j-array */ 4767 k = 0; 4768 for (i=0; i<nsends; i++) { 4769 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4770 bufJ = bufj+sstartsj[i]; 4771 for (j=0; j<nrows; j++) { 4772 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4773 for (ll=0; ll<sbs; ll++) { 4774 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4775 for (l=0; l<ncols; l++) { 4776 *bufJ++ = cols[l]; 4777 } 4778 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4779 } 4780 } 4781 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4782 } 4783 4784 /* recvs and sends of j-array are completed */ 4785 i = nrecvs; 4786 while (i--) { 4787 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4788 } 4789 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4790 } else if (scall == MAT_REUSE_MATRIX) { 4791 sstartsj = *startsj_s; 4792 rstartsj = *startsj_r; 4793 bufa = *bufa_ptr; 4794 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4795 b_otha = b_oth->a; 4796 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4797 4798 /* a-array */ 4799 /*---------*/ 4800 /* post receives of a-array */ 4801 for (i=0; i<nrecvs; i++) { 4802 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4803 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4804 } 4805 4806 /* pack the outgoing message a-array */ 4807 k = 0; 4808 for (i=0; i<nsends; i++) { 4809 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4810 bufA = bufa+sstartsj[i]; 4811 for (j=0; j<nrows; j++) { 4812 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4813 for (ll=0; ll<sbs; ll++) { 4814 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4815 for (l=0; l<ncols; l++) { 4816 *bufA++ = vals[l]; 4817 } 4818 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4819 } 4820 } 4821 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4822 } 4823 /* recvs and sends of a-array are completed */ 4824 i = nrecvs; 4825 while (i--) { 4826 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4827 } 4828 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4829 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4830 4831 if (scall == MAT_INITIAL_MATRIX) { 4832 /* put together the new matrix */ 4833 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4834 4835 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4836 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4838 b_oth->free_a = PETSC_TRUE; 4839 b_oth->free_ij = PETSC_TRUE; 4840 b_oth->nonew = 0; 4841 4842 ierr = PetscFree(bufj);CHKERRQ(ierr); 4843 if (!startsj_s || !bufa_ptr) { 4844 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4845 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4846 } else { 4847 *startsj_s = sstartsj; 4848 *startsj_r = rstartsj; 4849 *bufa_ptr = bufa; 4850 } 4851 } 4852 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4853 PetscFunctionReturn(0); 4854 } 4855 4856 #undef __FUNCT__ 4857 #define __FUNCT__ "MatGetCommunicationStructs" 4858 /*@C 4859 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4860 4861 Not Collective 4862 4863 Input Parameters: 4864 . A - The matrix in mpiaij format 4865 4866 Output Parameter: 4867 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4868 . colmap - A map from global column index to local index into lvec 4869 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4870 4871 Level: developer 4872 4873 @*/ 4874 #if defined(PETSC_USE_CTABLE) 4875 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4876 #else 4877 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4878 #endif 4879 { 4880 Mat_MPIAIJ *a; 4881 4882 PetscFunctionBegin; 4883 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4884 PetscValidPointer(lvec, 2); 4885 PetscValidPointer(colmap, 3); 4886 PetscValidPointer(multScatter, 4); 4887 a = (Mat_MPIAIJ*) A->data; 4888 if (lvec) *lvec = a->lvec; 4889 if (colmap) *colmap = a->colmap; 4890 if (multScatter) *multScatter = a->Mvctx; 4891 PetscFunctionReturn(0); 4892 } 4893 4894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4897 #if defined(PETSC_HAVE_ELEMENTAL) 4898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4899 #endif 4900 #if defined(PETSC_HAVE_HYPRE) 4901 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4902 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 4903 #endif 4904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4905 4906 #undef __FUNCT__ 4907 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4908 /* 4909 Computes (B'*A')' since computing B*A directly is untenable 4910 4911 n p p 4912 ( ) ( ) ( ) 4913 m ( A ) * n ( B ) = m ( C ) 4914 ( ) ( ) ( ) 4915 4916 */ 4917 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4918 { 4919 PetscErrorCode ierr; 4920 Mat At,Bt,Ct; 4921 4922 PetscFunctionBegin; 4923 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4924 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4925 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4926 ierr = MatDestroy(&At);CHKERRQ(ierr); 4927 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4928 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4929 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 #undef __FUNCT__ 4934 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4935 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4936 { 4937 PetscErrorCode ierr; 4938 PetscInt m=A->rmap->n,n=B->cmap->n; 4939 Mat Cmat; 4940 4941 PetscFunctionBegin; 4942 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4943 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4944 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4945 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4946 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4947 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4948 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4949 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4950 4951 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4952 4953 *C = Cmat; 4954 PetscFunctionReturn(0); 4955 } 4956 4957 /* ----------------------------------------------------------------*/ 4958 #undef __FUNCT__ 4959 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4960 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4961 { 4962 PetscErrorCode ierr; 4963 4964 PetscFunctionBegin; 4965 if (scall == MAT_INITIAL_MATRIX) { 4966 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4967 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4968 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4969 } 4970 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4971 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4972 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 4976 /*MC 4977 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4978 4979 Options Database Keys: 4980 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4981 4982 Level: beginner 4983 4984 .seealso: MatCreateAIJ() 4985 M*/ 4986 4987 #undef __FUNCT__ 4988 #define __FUNCT__ "MatCreate_MPIAIJ" 4989 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4990 { 4991 Mat_MPIAIJ *b; 4992 PetscErrorCode ierr; 4993 PetscMPIInt size; 4994 4995 PetscFunctionBegin; 4996 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4997 4998 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4999 B->data = (void*)b; 5000 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5001 B->assembled = PETSC_FALSE; 5002 B->insertmode = NOT_SET_VALUES; 5003 b->size = size; 5004 5005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5006 5007 /* build cache for off array entries formed */ 5008 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5009 5010 b->donotstash = PETSC_FALSE; 5011 b->colmap = 0; 5012 b->garray = 0; 5013 b->roworiented = PETSC_TRUE; 5014 5015 /* stuff used for matrix vector multiply */ 5016 b->lvec = NULL; 5017 b->Mvctx = NULL; 5018 5019 /* stuff for MatGetRow() */ 5020 b->rowindices = 0; 5021 b->rowvalues = 0; 5022 b->getrowactive = PETSC_FALSE; 5023 5024 /* flexible pointer used in CUSP/CUSPARSE classes */ 5025 b->spptr = NULL; 5026 5027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5037 #if defined(PETSC_HAVE_ELEMENTAL) 5038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5039 #endif 5040 #if defined(PETSC_HAVE_HYPRE) 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5042 #endif 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5047 #if defined(PETSC_HAVE_HYPRE) 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5049 #endif 5050 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5051 PetscFunctionReturn(0); 5052 } 5053 5054 #undef __FUNCT__ 5055 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5056 /*@C 5057 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5058 and "off-diagonal" part of the matrix in CSR format. 5059 5060 Collective on MPI_Comm 5061 5062 Input Parameters: 5063 + comm - MPI communicator 5064 . m - number of local rows (Cannot be PETSC_DECIDE) 5065 . n - This value should be the same as the local size used in creating the 5066 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5067 calculated if N is given) For square matrices n is almost always m. 5068 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5069 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5070 . i - row indices for "diagonal" portion of matrix 5071 . j - column indices 5072 . a - matrix values 5073 . oi - row indices for "off-diagonal" portion of matrix 5074 . oj - column indices 5075 - oa - matrix values 5076 5077 Output Parameter: 5078 . mat - the matrix 5079 5080 Level: advanced 5081 5082 Notes: 5083 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5084 must free the arrays once the matrix has been destroyed and not before. 5085 5086 The i and j indices are 0 based 5087 5088 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5089 5090 This sets local rows and cannot be used to set off-processor values. 5091 5092 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5093 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5094 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5095 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5096 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5097 communication if it is known that only local entries will be set. 5098 5099 .keywords: matrix, aij, compressed row, sparse, parallel 5100 5101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5102 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5103 @*/ 5104 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5105 { 5106 PetscErrorCode ierr; 5107 Mat_MPIAIJ *maij; 5108 5109 PetscFunctionBegin; 5110 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5111 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5112 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5113 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5114 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5115 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5116 maij = (Mat_MPIAIJ*) (*mat)->data; 5117 5118 (*mat)->preallocated = PETSC_TRUE; 5119 5120 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5121 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5122 5123 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5124 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5125 5126 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5127 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5128 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5129 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5130 5131 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5132 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5133 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5134 PetscFunctionReturn(0); 5135 } 5136 5137 /* 5138 Special version for direct calls from Fortran 5139 */ 5140 #include <petsc/private/fortranimpl.h> 5141 5142 /* Change these macros so can be used in void function */ 5143 #undef CHKERRQ 5144 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5145 #undef SETERRQ2 5146 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5147 #undef SETERRQ3 5148 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5149 #undef SETERRQ 5150 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5151 5152 #undef __FUNCT__ 5153 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5154 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5155 #define __FUNCT__ "MATSETVALUESMPIAIJ" 5156 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5157 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5158 #define __FUNCT__ "matsetvaluesmpiaij" 5159 #else 5160 #define __FUNCT__ "matsetvaluesmpiaij_" 5161 #endif 5162 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5163 { 5164 Mat mat = *mmat; 5165 PetscInt m = *mm, n = *mn; 5166 InsertMode addv = *maddv; 5167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5168 PetscScalar value; 5169 PetscErrorCode ierr; 5170 5171 MatCheckPreallocated(mat,1); 5172 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5173 5174 #if defined(PETSC_USE_DEBUG) 5175 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5176 #endif 5177 { 5178 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5179 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5180 PetscBool roworiented = aij->roworiented; 5181 5182 /* Some Variables required in the macro */ 5183 Mat A = aij->A; 5184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5185 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5186 MatScalar *aa = a->a; 5187 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5188 Mat B = aij->B; 5189 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5190 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5191 MatScalar *ba = b->a; 5192 5193 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5194 PetscInt nonew = a->nonew; 5195 MatScalar *ap1,*ap2; 5196 5197 PetscFunctionBegin; 5198 for (i=0; i<m; i++) { 5199 if (im[i] < 0) continue; 5200 #if defined(PETSC_USE_DEBUG) 5201 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5202 #endif 5203 if (im[i] >= rstart && im[i] < rend) { 5204 row = im[i] - rstart; 5205 lastcol1 = -1; 5206 rp1 = aj + ai[row]; 5207 ap1 = aa + ai[row]; 5208 rmax1 = aimax[row]; 5209 nrow1 = ailen[row]; 5210 low1 = 0; 5211 high1 = nrow1; 5212 lastcol2 = -1; 5213 rp2 = bj + bi[row]; 5214 ap2 = ba + bi[row]; 5215 rmax2 = bimax[row]; 5216 nrow2 = bilen[row]; 5217 low2 = 0; 5218 high2 = nrow2; 5219 5220 for (j=0; j<n; j++) { 5221 if (roworiented) value = v[i*n+j]; 5222 else value = v[i+j*m]; 5223 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5224 if (in[j] >= cstart && in[j] < cend) { 5225 col = in[j] - cstart; 5226 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5227 } else if (in[j] < 0) continue; 5228 #if defined(PETSC_USE_DEBUG) 5229 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5230 #endif 5231 else { 5232 if (mat->was_assembled) { 5233 if (!aij->colmap) { 5234 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5235 } 5236 #if defined(PETSC_USE_CTABLE) 5237 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5238 col--; 5239 #else 5240 col = aij->colmap[in[j]] - 1; 5241 #endif 5242 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5243 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5244 col = in[j]; 5245 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5246 B = aij->B; 5247 b = (Mat_SeqAIJ*)B->data; 5248 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5249 rp2 = bj + bi[row]; 5250 ap2 = ba + bi[row]; 5251 rmax2 = bimax[row]; 5252 nrow2 = bilen[row]; 5253 low2 = 0; 5254 high2 = nrow2; 5255 bm = aij->B->rmap->n; 5256 ba = b->a; 5257 } 5258 } else col = in[j]; 5259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5260 } 5261 } 5262 } else if (!aij->donotstash) { 5263 if (roworiented) { 5264 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5265 } else { 5266 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5267 } 5268 } 5269 } 5270 } 5271 PetscFunctionReturnVoid(); 5272 } 5273 5274