1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatSetBlockSizes_MPIAIJ" 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 62 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 63 { 64 PetscErrorCode ierr; 65 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 66 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 67 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 68 const PetscInt *ia,*ib; 69 const MatScalar *aa,*bb; 70 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 71 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 72 73 PetscFunctionBegin; 74 *keptrows = 0; 75 ia = a->i; 76 ib = b->i; 77 for (i=0; i<m; i++) { 78 na = ia[i+1] - ia[i]; 79 nb = ib[i+1] - ib[i]; 80 if (!na && !nb) { 81 cnt++; 82 goto ok1; 83 } 84 aa = a->a + ia[i]; 85 for (j=0; j<na; j++) { 86 if (aa[j] != 0.0) goto ok1; 87 } 88 bb = b->a + ib[i]; 89 for (j=0; j <nb; j++) { 90 if (bb[j] != 0.0) goto ok1; 91 } 92 cnt++; 93 ok1:; 94 } 95 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 96 if (!n0rows) PetscFunctionReturn(0); 97 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 98 cnt = 0; 99 for (i=0; i<m; i++) { 100 na = ia[i+1] - ia[i]; 101 nb = ib[i+1] - ib[i]; 102 if (!na && !nb) continue; 103 aa = a->a + ia[i]; 104 for (j=0; j<na;j++) { 105 if (aa[j] != 0.0) { 106 rows[cnt++] = rstart + i; 107 goto ok2; 108 } 109 } 110 bb = b->a + ib[i]; 111 for (j=0; j<nb; j++) { 112 if (bb[j] != 0.0) { 113 rows[cnt++] = rstart + i; 114 goto ok2; 115 } 116 } 117 ok2:; 118 } 119 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 125 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 126 { 127 PetscErrorCode ierr; 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 129 130 PetscFunctionBegin; 131 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 132 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 133 } else { 134 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 135 } 136 PetscFunctionReturn(0); 137 } 138 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 142 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 145 PetscErrorCode ierr; 146 PetscInt i,rstart,nrows,*rows; 147 148 PetscFunctionBegin; 149 *zrows = NULL; 150 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 151 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 152 for (i=0; i<nrows; i++) rows[i] += rstart; 153 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 159 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 160 { 161 PetscErrorCode ierr; 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 163 PetscInt i,n,*garray = aij->garray; 164 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 165 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 166 PetscReal *work; 167 168 PetscFunctionBegin; 169 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 170 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 171 if (type == NORM_2) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 177 } 178 } else if (type == NORM_1) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 184 } 185 } else if (type == NORM_INFINITY) { 186 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 187 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 188 } 189 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 190 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 191 } 192 193 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 194 if (type == NORM_INFINITY) { 195 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 196 } else { 197 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 198 } 199 ierr = PetscFree(work);CHKERRQ(ierr); 200 if (type == NORM_2) { 201 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 202 } 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNCT__ 207 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 208 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 209 { 210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 211 IS sis,gis; 212 PetscErrorCode ierr; 213 const PetscInt *isis,*igis; 214 PetscInt n,*iis,nsis,ngis,rstart,i; 215 216 PetscFunctionBegin; 217 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 218 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 219 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 220 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 221 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 223 224 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 225 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 226 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 227 n = ngis + nsis; 228 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 229 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 230 for (i=0; i<n; i++) iis[i] += rstart; 231 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 232 233 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 235 ierr = ISDestroy(&sis);CHKERRQ(ierr); 236 ierr = ISDestroy(&gis);CHKERRQ(ierr); 237 PetscFunctionReturn(0); 238 } 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatDistribute_MPIAIJ" 242 /* 243 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 244 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 245 246 Only for square matrices 247 248 Used by a preconditioner, hence PETSC_EXTERN 249 */ 250 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 251 { 252 PetscMPIInt rank,size; 253 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 254 PetscErrorCode ierr; 255 Mat mat; 256 Mat_SeqAIJ *gmata; 257 PetscMPIInt tag; 258 MPI_Status status; 259 PetscBool aij; 260 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 261 262 PetscFunctionBegin; 263 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 264 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 265 if (!rank) { 266 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 267 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 268 } 269 if (reuse == MAT_INITIAL_MATRIX) { 270 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 271 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 272 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 273 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 274 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 275 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 276 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 277 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 278 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 279 280 rowners[0] = 0; 281 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 282 rstart = rowners[rank]; 283 rend = rowners[rank+1]; 284 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 285 if (!rank) { 286 gmata = (Mat_SeqAIJ*) gmat->data; 287 /* send row lengths to all processors */ 288 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 289 for (i=1; i<size; i++) { 290 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 291 } 292 /* determine number diagonal and off-diagonal counts */ 293 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 294 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 295 jj = 0; 296 for (i=0; i<m; i++) { 297 for (j=0; j<dlens[i]; j++) { 298 if (gmata->j[jj] < rstart) ld[i]++; 299 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 300 jj++; 301 } 302 } 303 /* send column indices to other processes */ 304 for (i=1; i<size; i++) { 305 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 306 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 307 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 308 } 309 310 /* send numerical values to other processes */ 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 gmataa = gmata->a; 316 gmataj = gmata->j; 317 318 } else { 319 /* receive row lengths */ 320 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 321 /* receive column indices */ 322 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 323 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 325 /* determine number diagonal and off-diagonal counts */ 326 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 327 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 328 jj = 0; 329 for (i=0; i<m; i++) { 330 for (j=0; j<dlens[i]; j++) { 331 if (gmataj[jj] < rstart) ld[i]++; 332 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 333 jj++; 334 } 335 } 336 /* receive numerical values */ 337 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 339 } 340 /* set preallocation */ 341 for (i=0; i<m; i++) { 342 dlens[i] -= olens[i]; 343 } 344 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 345 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 346 347 for (i=0; i<m; i++) { 348 dlens[i] += olens[i]; 349 } 350 cnt = 0; 351 for (i=0; i<m; i++) { 352 row = rstart + i; 353 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 354 cnt += dlens[i]; 355 } 356 if (rank) { 357 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 358 } 359 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 360 ierr = PetscFree(rowners);CHKERRQ(ierr); 361 362 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 363 364 *inmat = mat; 365 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 366 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 367 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 368 mat = *inmat; 369 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 370 if (!rank) { 371 /* send numerical values to other processes */ 372 gmata = (Mat_SeqAIJ*) gmat->data; 373 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 374 gmataa = gmata->a; 375 for (i=1; i<size; i++) { 376 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 377 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 378 } 379 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 380 } else { 381 /* receive numerical values from process 0*/ 382 nz = Ad->nz + Ao->nz; 383 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 384 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 385 } 386 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 387 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 388 ad = Ad->a; 389 ao = Ao->a; 390 if (mat->rmap->n) { 391 i = 0; 392 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 393 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 394 } 395 for (i=1; i<mat->rmap->n; i++) { 396 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 397 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 398 } 399 i--; 400 if (mat->rmap->n) { 401 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 402 } 403 if (rank) { 404 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 405 } 406 } 407 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 408 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 /* 413 Local utility routine that creates a mapping from the global column 414 number to the local number in the off-diagonal part of the local 415 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 416 a slightly higher hash table cost; without it it is not scalable (each processor 417 has an order N integer array but is fast to acess. 418 */ 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 421 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 422 { 423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 424 PetscErrorCode ierr; 425 PetscInt n = aij->B->cmap->n,i; 426 427 PetscFunctionBegin; 428 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 429 #if defined(PETSC_USE_CTABLE) 430 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 431 for (i=0; i<n; i++) { 432 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 433 } 434 #else 435 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 436 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 437 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 438 #endif 439 PetscFunctionReturn(0); 440 } 441 442 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 443 { \ 444 if (col <= lastcol1) low1 = 0; \ 445 else high1 = nrow1; \ 446 lastcol1 = col;\ 447 while (high1-low1 > 5) { \ 448 t = (low1+high1)/2; \ 449 if (rp1[t] > col) high1 = t; \ 450 else low1 = t; \ 451 } \ 452 for (_i=low1; _i<high1; _i++) { \ 453 if (rp1[_i] > col) break; \ 454 if (rp1[_i] == col) { \ 455 if (addv == ADD_VALUES) ap1[_i] += value; \ 456 else ap1[_i] = value; \ 457 goto a_noinsert; \ 458 } \ 459 } \ 460 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 461 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 462 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 464 N = nrow1++ - 1; a->nz++; high1++; \ 465 /* shift up all the later entries in this row */ \ 466 for (ii=N; ii>=_i; ii--) { \ 467 rp1[ii+1] = rp1[ii]; \ 468 ap1[ii+1] = ap1[ii]; \ 469 } \ 470 rp1[_i] = col; \ 471 ap1[_i] = value; \ 472 A->nonzerostate++;\ 473 a_noinsert: ; \ 474 ailen[row] = nrow1; \ 475 } 476 477 478 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 479 { \ 480 if (col <= lastcol2) low2 = 0; \ 481 else high2 = nrow2; \ 482 lastcol2 = col; \ 483 while (high2-low2 > 5) { \ 484 t = (low2+high2)/2; \ 485 if (rp2[t] > col) high2 = t; \ 486 else low2 = t; \ 487 } \ 488 for (_i=low2; _i<high2; _i++) { \ 489 if (rp2[_i] > col) break; \ 490 if (rp2[_i] == col) { \ 491 if (addv == ADD_VALUES) ap2[_i] += value; \ 492 else ap2[_i] = value; \ 493 goto b_noinsert; \ 494 } \ 495 } \ 496 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 497 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 499 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 500 N = nrow2++ - 1; b->nz++; high2++; \ 501 /* shift up all the later entries in this row */ \ 502 for (ii=N; ii>=_i; ii--) { \ 503 rp2[ii+1] = rp2[ii]; \ 504 ap2[ii+1] = ap2[ii]; \ 505 } \ 506 rp2[_i] = col; \ 507 ap2[_i] = value; \ 508 B->nonzerostate++; \ 509 b_noinsert: ; \ 510 bilen[row] = nrow2; \ 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 515 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 516 { 517 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 519 PetscErrorCode ierr; 520 PetscInt l,*garray = mat->garray,diag; 521 522 PetscFunctionBegin; 523 /* code only works for square matrices A */ 524 525 /* find size of row to the left of the diagonal part */ 526 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 527 row = row - diag; 528 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 529 if (garray[b->j[b->i[row]+l]] > diag) break; 530 } 531 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 532 533 /* diagonal part */ 534 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 535 536 /* right of diagonal part */ 537 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatSetValues_MPIAIJ" 543 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 544 { 545 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 546 PetscScalar value; 547 PetscErrorCode ierr; 548 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 549 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 550 PetscBool roworiented = aij->roworiented; 551 552 /* Some Variables required in the macro */ 553 Mat A = aij->A; 554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 555 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 556 MatScalar *aa = a->a; 557 PetscBool ignorezeroentries = a->ignorezeroentries; 558 Mat B = aij->B; 559 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 560 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 561 MatScalar *ba = b->a; 562 563 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 564 PetscInt nonew; 565 MatScalar *ap1,*ap2; 566 567 PetscFunctionBegin; 568 for (i=0; i<m; i++) { 569 if (im[i] < 0) continue; 570 #if defined(PETSC_USE_DEBUG) 571 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 572 #endif 573 if (im[i] >= rstart && im[i] < rend) { 574 row = im[i] - rstart; 575 lastcol1 = -1; 576 rp1 = aj + ai[row]; 577 ap1 = aa + ai[row]; 578 rmax1 = aimax[row]; 579 nrow1 = ailen[row]; 580 low1 = 0; 581 high1 = nrow1; 582 lastcol2 = -1; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 590 for (j=0; j<n; j++) { 591 if (roworiented) value = v[i*n+j]; 592 else value = v[i+j*m]; 593 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 594 if (in[j] >= cstart && in[j] < cend) { 595 col = in[j] - cstart; 596 nonew = a->nonew; 597 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 598 } else if (in[j] < 0) continue; 599 #if defined(PETSC_USE_DEBUG) 600 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 601 #endif 602 else { 603 if (mat->was_assembled) { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[in[j]] - 1; 612 #endif 613 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 614 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 615 col = in[j]; 616 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 617 B = aij->B; 618 b = (Mat_SeqAIJ*)B->data; 619 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 620 rp2 = bj + bi[row]; 621 ap2 = ba + bi[row]; 622 rmax2 = bimax[row]; 623 nrow2 = bilen[row]; 624 low2 = 0; 625 high2 = nrow2; 626 bm = aij->B->rmap->n; 627 ba = b->a; 628 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 629 } else col = in[j]; 630 nonew = b->nonew; 631 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 632 } 633 } 634 } else { 635 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 636 if (!aij->donotstash) { 637 mat->assembled = PETSC_FALSE; 638 if (roworiented) { 639 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 640 } else { 641 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 642 } 643 } 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatGetValues_MPIAIJ" 651 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 PetscErrorCode ierr; 655 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 656 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 657 658 PetscFunctionBegin; 659 for (i=0; i<m; i++) { 660 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 661 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 662 if (idxm[i] >= rstart && idxm[i] < rend) { 663 row = idxm[i] - rstart; 664 for (j=0; j<n; j++) { 665 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 666 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 667 if (idxn[j] >= cstart && idxn[j] < cend) { 668 col = idxn[j] - cstart; 669 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 670 } else { 671 if (!aij->colmap) { 672 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 673 } 674 #if defined(PETSC_USE_CTABLE) 675 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 676 col--; 677 #else 678 col = aij->colmap[idxn[j]] - 1; 679 #endif 680 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 681 else { 682 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 683 } 684 } 685 } 686 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 695 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 696 { 697 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 698 PetscErrorCode ierr; 699 PetscInt nstash,reallocs; 700 701 PetscFunctionBegin; 702 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 703 704 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 705 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 706 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 712 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 715 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 716 PetscErrorCode ierr; 717 PetscMPIInt n; 718 PetscInt i,j,rstart,ncols,flg; 719 PetscInt *row,*col; 720 PetscBool other_disassembled; 721 PetscScalar *val; 722 723 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 724 725 PetscFunctionBegin; 726 if (!aij->donotstash && !mat->nooffprocentries) { 727 while (1) { 728 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 729 if (!flg) break; 730 731 for (i=0; i<n; ) { 732 /* Now identify the consecutive vals belonging to the same row */ 733 for (j=i,rstart=row[j]; j<n; j++) { 734 if (row[j] != rstart) break; 735 } 736 if (j < n) ncols = j-i; 737 else ncols = n-i; 738 /* Now assemble all these values with a single function call */ 739 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 740 741 i = j; 742 } 743 } 744 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 745 } 746 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 747 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 748 749 /* determine if any processor has disassembled, if so we must 750 also disassemble ourselfs, in order that we may reassemble. */ 751 /* 752 if nonzero structure of submatrix B cannot change then we know that 753 no processor disassembled thus we can skip this stuff 754 */ 755 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 756 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 757 if (mat->was_assembled && !other_disassembled) { 758 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 759 } 760 } 761 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 762 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 763 } 764 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 765 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 766 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 767 768 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 769 770 aij->rowvalues = 0; 771 772 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 773 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 774 775 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 776 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 777 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 778 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 779 } 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 785 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 786 { 787 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 792 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatZeroRows_MPIAIJ" 798 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 799 { 800 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 801 PetscInt *lrows; 802 PetscInt r, len; 803 PetscErrorCode ierr; 804 805 PetscFunctionBegin; 806 /* get locally owned rows */ 807 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 808 /* fix right hand side if needed */ 809 if (x && b) { 810 const PetscScalar *xx; 811 PetscScalar *bb; 812 813 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 814 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 815 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 816 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 817 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 818 } 819 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 820 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 822 PetscBool cong; 823 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 824 if (cong) A->congruentlayouts = 1; 825 else A->congruentlayouts = 0; 826 } 827 if ((diag != 0.0) && A->congruentlayouts) { 828 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 829 } else if (diag != 0.0) { 830 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 831 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 832 for (r = 0; r < len; ++r) { 833 const PetscInt row = lrows[r] + A->rmap->rstart; 834 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 835 } 836 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 837 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 838 } else { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(lrows);CHKERRQ(ierr); 842 843 /* only change matrix nonzero state if pattern was allowed to be changed */ 844 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 845 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 853 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 856 PetscErrorCode ierr; 857 PetscMPIInt n = A->rmap->n; 858 PetscInt i,j,r,m,p = 0,len = 0; 859 PetscInt *lrows,*owners = A->rmap->range; 860 PetscSFNode *rrows; 861 PetscSF sf; 862 const PetscScalar *xx; 863 PetscScalar *bb,*mask; 864 Vec xmask,lmask; 865 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 866 const PetscInt *aj, *ii,*ridx; 867 PetscScalar *aa; 868 869 PetscFunctionBegin; 870 /* Create SF where leaves are input rows and roots are owned rows */ 871 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 872 for (r = 0; r < n; ++r) lrows[r] = -1; 873 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 874 for (r = 0; r < N; ++r) { 875 const PetscInt idx = rows[r]; 876 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 877 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 878 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 879 } 880 rrows[r].rank = p; 881 rrows[r].index = rows[r] - owners[p]; 882 } 883 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 884 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 885 /* Collect flags for rows to be zeroed */ 886 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 887 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 888 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 889 /* Compress and put in row numbers */ 890 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 891 /* zero diagonal part of matrix */ 892 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 893 /* handle off diagonal part of matrix */ 894 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 895 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 896 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 897 for (i=0; i<len; i++) bb[lrows[i]] = 1; 898 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 899 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 902 if (x) { 903 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 904 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 906 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 907 } 908 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 909 /* remove zeroed rows of off diagonal matrix */ 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij->a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij->a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x) { 951 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 952 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 953 } 954 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 955 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 956 ierr = PetscFree(lrows);CHKERRQ(ierr); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMult_MPIAIJ" 968 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt nt; 973 974 PetscFunctionBegin; 975 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 976 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 986 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 991 PetscFunctionBegin; 992 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultAdd_MPIAIJ" 998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1013 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1014 { 1015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1016 PetscErrorCode ierr; 1017 PetscBool merged; 1018 1019 PetscFunctionBegin; 1020 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1021 /* do nondiagonal part */ 1022 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1023 if (!merged) { 1024 /* send it on its way */ 1025 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1026 /* do local part */ 1027 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1028 /* receive remote parts: note this assumes the values are not actually */ 1029 /* added in yy until the next line, */ 1030 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1031 } else { 1032 /* do local part */ 1033 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1034 /* send it on its way */ 1035 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 /* values actually were received in the Begin() but we need to call this nop */ 1037 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1044 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1045 { 1046 MPI_Comm comm; 1047 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1048 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1049 IS Me,Notme; 1050 PetscErrorCode ierr; 1051 PetscInt M,N,first,last,*notme,i; 1052 PetscMPIInt size; 1053 1054 PetscFunctionBegin; 1055 /* Easy test: symmetric diagonal block */ 1056 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1057 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1058 if (!*f) PetscFunctionReturn(0); 1059 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1060 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1061 if (size == 1) PetscFunctionReturn(0); 1062 1063 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1064 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1065 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1066 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1067 for (i=0; i<first; i++) notme[i] = i; 1068 for (i=last; i<M; i++) notme[i-last+first] = i; 1069 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1070 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1071 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1072 Aoff = Aoffs[0]; 1073 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1074 Boff = Boffs[0]; 1075 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1076 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1077 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1078 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1079 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1080 ierr = PetscFree(notme);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1086 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 1091 PetscFunctionBegin; 1092 /* do nondiagonal part */ 1093 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1094 /* send it on its way */ 1095 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 /* do local part */ 1097 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1098 /* receive remote parts */ 1099 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* 1104 This only works correctly for square matrices where the subblock A->A is the 1105 diagonal block 1106 */ 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1109 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1110 { 1111 PetscErrorCode ierr; 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 1114 PetscFunctionBegin; 1115 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1116 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1117 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatScale_MPIAIJ" 1123 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 PetscErrorCode ierr; 1127 1128 PetscFunctionBegin; 1129 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1130 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatDestroy_MPIAIJ" 1136 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1137 { 1138 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1139 PetscErrorCode ierr; 1140 1141 PetscFunctionBegin; 1142 #if defined(PETSC_USE_LOG) 1143 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1144 #endif 1145 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1146 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1147 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1148 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1149 #if defined(PETSC_USE_CTABLE) 1150 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1151 #else 1152 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1153 #endif 1154 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1156 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1157 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1158 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1159 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1160 1161 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1162 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1163 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1164 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1165 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1169 #if defined(PETSC_HAVE_ELEMENTAL) 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1171 #endif 1172 #if defined(PETSC_HAVE_HYPRE) 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1175 #endif 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1181 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1184 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1185 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1186 PetscErrorCode ierr; 1187 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1188 int fd; 1189 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1190 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1191 PetscScalar *column_values; 1192 PetscInt message_count,flowcontrolcount; 1193 FILE *file; 1194 1195 PetscFunctionBegin; 1196 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1197 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1198 nz = A->nz + B->nz; 1199 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1200 if (!rank) { 1201 header[0] = MAT_FILE_CLASSID; 1202 header[1] = mat->rmap->N; 1203 header[2] = mat->cmap->N; 1204 1205 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1207 /* get largest number of rows any processor has */ 1208 rlen = mat->rmap->n; 1209 range = mat->rmap->range; 1210 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1211 } else { 1212 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 rlen = mat->rmap->n; 1214 } 1215 1216 /* load up the local row counts */ 1217 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1218 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1219 1220 /* store the row lengths to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1224 for (i=1; i<size; i++) { 1225 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1226 rlen = range[i+1] - range[i]; 1227 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1229 } 1230 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1231 } else { 1232 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1233 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1235 } 1236 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1237 1238 /* load up the local column indices */ 1239 nzmax = nz; /* th processor needs space a largest processor needs */ 1240 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1242 cnt = 0; 1243 for (i=0; i<mat->rmap->n; i++) { 1244 for (j=B->i[i]; j<B->i[i+1]; j++) { 1245 if ((col = garray[B->j[j]]) > cstart) break; 1246 column_indices[cnt++] = col; 1247 } 1248 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1249 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1250 } 1251 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1252 1253 /* store the column indices to the file */ 1254 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1255 if (!rank) { 1256 MPI_Status status; 1257 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1258 for (i=1; i<size; i++) { 1259 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1260 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1261 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1262 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 } 1265 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1266 } else { 1267 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1268 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1271 } 1272 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1273 1274 /* load up the local column values */ 1275 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1276 cnt = 0; 1277 for (i=0; i<mat->rmap->n; i++) { 1278 for (j=B->i[i]; j<B->i[i+1]; j++) { 1279 if (garray[B->j[j]] > cstart) break; 1280 column_values[cnt++] = B->a[j]; 1281 } 1282 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1283 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1284 } 1285 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1286 1287 /* store the column values to the file */ 1288 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1289 if (!rank) { 1290 MPI_Status status; 1291 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1292 for (i=1; i<size; i++) { 1293 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1294 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1295 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1296 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1297 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 } 1299 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1300 } else { 1301 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1302 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1304 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1305 } 1306 ierr = PetscFree(column_values);CHKERRQ(ierr); 1307 1308 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1309 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #include <petscdraw.h> 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1316 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 PetscErrorCode ierr; 1320 PetscMPIInt rank = aij->rank,size = aij->size; 1321 PetscBool isdraw,iascii,isbinary; 1322 PetscViewer sviewer; 1323 PetscViewerFormat format; 1324 1325 PetscFunctionBegin; 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1327 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1328 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1329 if (iascii) { 1330 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1331 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1332 MatInfo info; 1333 PetscBool inodes; 1334 1335 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1336 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1337 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1338 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1339 if (!inodes) { 1340 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1341 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1344 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1345 } 1346 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1350 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1352 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1353 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1356 PetscInt inodecount,inodelimit,*inodes; 1357 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1358 if (inodes) { 1359 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1360 } else { 1361 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1362 } 1363 PetscFunctionReturn(0); 1364 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1365 PetscFunctionReturn(0); 1366 } 1367 } else if (isbinary) { 1368 if (size == 1) { 1369 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1370 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1371 } else { 1372 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } else if (isdraw) { 1376 PetscDraw draw; 1377 PetscBool isnull; 1378 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1379 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1380 if (isnull) PetscFunctionReturn(0); 1381 } 1382 1383 { 1384 /* assemble the entire matrix onto first processor. */ 1385 Mat A; 1386 Mat_SeqAIJ *Aloc; 1387 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1388 MatScalar *a; 1389 1390 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1391 if (!rank) { 1392 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1393 } else { 1394 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1395 } 1396 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1397 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1398 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1399 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1400 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1401 1402 /* copy over the A part */ 1403 Aloc = (Mat_SeqAIJ*)aij->A->data; 1404 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1405 row = mat->rmap->rstart; 1406 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; 1410 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)aij->B->data; 1417 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1418 row = mat->rmap->rstart; 1419 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; 1425 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1426 } 1427 ierr = PetscFree(ct);CHKERRQ(ierr); 1428 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 /* 1431 Everyone has to call to draw the matrix since the graphics waits are 1432 synchronized across all processors that share the PetscDraw object 1433 */ 1434 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1435 if (!rank) { 1436 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1437 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1438 } 1439 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1440 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1441 ierr = MatDestroy(&A);CHKERRQ(ierr); 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatView_MPIAIJ" 1448 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1449 { 1450 PetscErrorCode ierr; 1451 PetscBool iascii,isdraw,issocket,isbinary; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1458 if (iascii || isdraw || isbinary || issocket) { 1459 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatSOR_MPIAIJ" 1466 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1467 { 1468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1469 PetscErrorCode ierr; 1470 Vec bb1 = 0; 1471 PetscBool hasop; 1472 1473 PetscFunctionBegin; 1474 if (flag == SOR_APPLY_UPPER) { 1475 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1480 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1481 } 1482 1483 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1484 if (flag & SOR_ZERO_INITIAL_GUESS) { 1485 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1486 its--; 1487 } 1488 1489 while (its--) { 1490 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 1493 /* update rhs: bb1 = bb - B*x */ 1494 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1495 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1496 1497 /* local sweep */ 1498 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1499 } 1500 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1501 if (flag & SOR_ZERO_INITIAL_GUESS) { 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1503 its--; 1504 } 1505 while (its--) { 1506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 1509 /* update rhs: bb1 = bb - B*x */ 1510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1512 1513 /* local sweep */ 1514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1515 } 1516 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1517 if (flag & SOR_ZERO_INITIAL_GUESS) { 1518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1519 its--; 1520 } 1521 while (its--) { 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 1525 /* update rhs: bb1 = bb - B*x */ 1526 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1527 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1531 } 1532 } else if (flag & SOR_EISENSTAT) { 1533 Vec xx1; 1534 1535 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1537 1538 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1539 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1540 if (!mat->diag) { 1541 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1542 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1543 } 1544 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1545 if (hasop) { 1546 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1547 } else { 1548 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1549 } 1550 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1551 1552 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1553 1554 /* local sweep */ 1555 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1556 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1557 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1558 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1559 1560 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1561 1562 matin->factorerrortype = mat->A->factorerrortype; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatPermute_MPIAIJ" 1568 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1569 { 1570 Mat aA,aB,Aperm; 1571 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1572 PetscScalar *aa,*ba; 1573 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1574 PetscSF rowsf,sf; 1575 IS parcolp = NULL; 1576 PetscBool done; 1577 PetscErrorCode ierr; 1578 1579 PetscFunctionBegin; 1580 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1581 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1582 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1583 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1584 1585 /* Invert row permutation to find out where my rows should go */ 1586 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1587 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1588 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1589 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1590 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1591 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 1593 /* Invert column permutation to find out where my columns should go */ 1594 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1595 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1596 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1597 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1598 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1599 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1600 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1601 1602 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1603 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1604 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1605 1606 /* Find out where my gcols should go */ 1607 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1608 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1609 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1610 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1611 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1612 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1614 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1615 1616 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1617 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1618 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1619 for (i=0; i<m; i++) { 1620 PetscInt row = rdest[i],rowner; 1621 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1622 for (j=ai[i]; j<ai[i+1]; j++) { 1623 PetscInt cowner,col = cdest[aj[j]]; 1624 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1625 if (rowner == cowner) dnnz[i]++; 1626 else onnz[i]++; 1627 } 1628 for (j=bi[i]; j<bi[i+1]; j++) { 1629 PetscInt cowner,col = gcdest[bj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 } 1635 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1636 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1637 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1639 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1640 1641 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1642 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1643 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1644 for (i=0; i<m; i++) { 1645 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1646 PetscInt j0,rowlen; 1647 rowlen = ai[i+1] - ai[i]; 1648 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1649 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1650 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1651 } 1652 rowlen = bi[i+1] - bi[i]; 1653 for (j0=j=0; j<rowlen; j0=j) { 1654 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1655 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1656 } 1657 } 1658 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1660 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1661 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1662 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1663 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1664 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1665 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1666 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1667 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1668 *B = Aperm; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1674 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1675 { 1676 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1681 if (ghosts) *ghosts = aij->garray; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1687 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1688 { 1689 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1690 Mat A = mat->A,B = mat->B; 1691 PetscErrorCode ierr; 1692 PetscReal isend[5],irecv[5]; 1693 1694 PetscFunctionBegin; 1695 info->block_size = 1.0; 1696 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1697 1698 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1699 isend[3] = info->memory; isend[4] = info->mallocs; 1700 1701 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1702 1703 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1704 isend[3] += info->memory; isend[4] += info->mallocs; 1705 if (flag == MAT_LOCAL) { 1706 info->nz_used = isend[0]; 1707 info->nz_allocated = isend[1]; 1708 info->nz_unneeded = isend[2]; 1709 info->memory = isend[3]; 1710 info->mallocs = isend[4]; 1711 } else if (flag == MAT_GLOBAL_MAX) { 1712 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1713 1714 info->nz_used = irecv[0]; 1715 info->nz_allocated = irecv[1]; 1716 info->nz_unneeded = irecv[2]; 1717 info->memory = irecv[3]; 1718 info->mallocs = irecv[4]; 1719 } else if (flag == MAT_GLOBAL_SUM) { 1720 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } 1728 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1729 info->fill_ratio_needed = 0; 1730 info->factor_mallocs = 0; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatSetOption_MPIAIJ" 1736 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1737 { 1738 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 switch (op) { 1743 case MAT_NEW_NONZERO_LOCATIONS: 1744 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1745 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1746 case MAT_KEEP_NONZERO_PATTERN: 1747 case MAT_NEW_NONZERO_LOCATION_ERR: 1748 case MAT_USE_INODES: 1749 case MAT_IGNORE_ZERO_ENTRIES: 1750 MatCheckPreallocated(A,1); 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1753 break; 1754 case MAT_ROW_ORIENTED: 1755 MatCheckPreallocated(A,1); 1756 a->roworiented = flg; 1757 1758 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1759 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1760 break; 1761 case MAT_NEW_DIAGONALS: 1762 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1763 break; 1764 case MAT_IGNORE_OFF_PROC_ENTRIES: 1765 a->donotstash = flg; 1766 break; 1767 case MAT_SPD: 1768 A->spd_set = PETSC_TRUE; 1769 A->spd = flg; 1770 if (flg) { 1771 A->symmetric = PETSC_TRUE; 1772 A->structurally_symmetric = PETSC_TRUE; 1773 A->symmetric_set = PETSC_TRUE; 1774 A->structurally_symmetric_set = PETSC_TRUE; 1775 } 1776 break; 1777 case MAT_SYMMETRIC: 1778 MatCheckPreallocated(A,1); 1779 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1780 break; 1781 case MAT_STRUCTURALLY_SYMMETRIC: 1782 MatCheckPreallocated(A,1); 1783 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1784 break; 1785 case MAT_HERMITIAN: 1786 MatCheckPreallocated(A,1); 1787 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1788 break; 1789 case MAT_SYMMETRY_ETERNAL: 1790 MatCheckPreallocated(A,1); 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_SUBMAT_SINGLEIS: 1794 A->submat_singleis = flg; 1795 break; 1796 default: 1797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatGetRow_MPIAIJ" 1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1805 { 1806 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1807 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1808 PetscErrorCode ierr; 1809 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1810 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1811 PetscInt *cmap,*idx_p; 1812 1813 PetscFunctionBegin; 1814 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1815 mat->getrowactive = PETSC_TRUE; 1816 1817 if (!mat->rowvalues && (idx || v)) { 1818 /* 1819 allocate enough space to hold information from the longest row. 1820 */ 1821 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1822 PetscInt max = 1,tmp; 1823 for (i=0; i<matin->rmap->n; i++) { 1824 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1825 if (max < tmp) max = tmp; 1826 } 1827 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1828 } 1829 1830 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1831 lrow = row - rstart; 1832 1833 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1834 if (!v) {pvA = 0; pvB = 0;} 1835 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1836 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1837 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1838 nztot = nzA + nzB; 1839 1840 cmap = mat->garray; 1841 if (v || idx) { 1842 if (nztot) { 1843 /* Sort by increasing column numbers, assuming A and B already sorted */ 1844 PetscInt imark = -1; 1845 if (v) { 1846 *v = v_p = mat->rowvalues; 1847 for (i=0; i<nzB; i++) { 1848 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1849 else break; 1850 } 1851 imark = i; 1852 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1853 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1854 } 1855 if (idx) { 1856 *idx = idx_p = mat->rowindices; 1857 if (imark > -1) { 1858 for (i=0; i<imark; i++) { 1859 idx_p[i] = cmap[cworkB[i]]; 1860 } 1861 } else { 1862 for (i=0; i<nzB; i++) { 1863 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1864 else break; 1865 } 1866 imark = i; 1867 } 1868 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1869 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1870 } 1871 } else { 1872 if (idx) *idx = 0; 1873 if (v) *v = 0; 1874 } 1875 } 1876 *nz = nztot; 1877 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1885 { 1886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1887 1888 PetscFunctionBegin; 1889 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1890 aij->getrowactive = PETSC_FALSE; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatNorm_MPIAIJ" 1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1897 { 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1899 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1900 PetscErrorCode ierr; 1901 PetscInt i,j,cstart = mat->cmap->rstart; 1902 PetscReal sum = 0.0; 1903 MatScalar *v; 1904 1905 PetscFunctionBegin; 1906 if (aij->size == 1) { 1907 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1908 } else { 1909 if (type == NORM_FROBENIUS) { 1910 v = amat->a; 1911 for (i=0; i<amat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 v = bmat->a; 1915 for (i=0; i<bmat->nz; i++) { 1916 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1917 } 1918 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1919 *norm = PetscSqrtReal(*norm); 1920 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1921 } else if (type == NORM_1) { /* max column norm */ 1922 PetscReal *tmp,*tmp2; 1923 PetscInt *jj,*garray = aij->garray; 1924 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1925 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1926 *norm = 0.0; 1927 v = amat->a; jj = amat->j; 1928 for (j=0; j<amat->nz; j++) { 1929 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a; jj = bmat->j; 1932 for (j=0; j<bmat->nz; j++) { 1933 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1934 } 1935 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 for (j=0; j<mat->cmap->N; j++) { 1937 if (tmp2[j] > *norm) *norm = tmp2[j]; 1938 } 1939 ierr = PetscFree(tmp);CHKERRQ(ierr); 1940 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1941 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1942 } else if (type == NORM_INFINITY) { /* max row norm */ 1943 PetscReal ntemp = 0.0; 1944 for (j=0; j<aij->A->rmap->n; j++) { 1945 v = amat->a + amat->i[j]; 1946 sum = 0.0; 1947 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1948 sum += PetscAbsScalar(*v); v++; 1949 } 1950 v = bmat->a + bmat->i[j]; 1951 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1952 sum += PetscAbsScalar(*v); v++; 1953 } 1954 if (sum > ntemp) ntemp = sum; 1955 } 1956 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1957 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1958 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatTranspose_MPIAIJ" 1965 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1966 { 1967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1968 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1969 PetscErrorCode ierr; 1970 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1971 PetscInt cstart = A->cmap->rstart,ncol; 1972 Mat B; 1973 MatScalar *array; 1974 1975 PetscFunctionBegin; 1976 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1977 1978 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1979 ai = Aloc->i; aj = Aloc->j; 1980 bi = Bloc->i; bj = Bloc->j; 1981 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1982 PetscInt *d_nnz,*g_nnz,*o_nnz; 1983 PetscSFNode *oloc; 1984 PETSC_UNUSED PetscSF sf; 1985 1986 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1987 /* compute d_nnz for preallocation */ 1988 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1989 for (i=0; i<ai[ma]; i++) { 1990 d_nnz[aj[i]]++; 1991 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1992 } 1993 /* compute local off-diagonal contributions */ 1994 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1995 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1996 /* map those to global */ 1997 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1998 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1999 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2000 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2001 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2002 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2003 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2004 2005 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2006 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2007 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2008 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2009 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2010 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2011 } else { 2012 B = *matout; 2013 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2014 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2015 } 2016 2017 /* copy over the A part */ 2018 array = Aloc->a; 2019 row = A->rmap->rstart; 2020 for (i=0; i<ma; i++) { 2021 ncol = ai[i+1]-ai[i]; 2022 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2023 row++; 2024 array += ncol; aj += ncol; 2025 } 2026 aj = Aloc->j; 2027 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2028 2029 /* copy over the B part */ 2030 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2031 array = Bloc->a; 2032 row = A->rmap->rstart; 2033 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2034 cols_tmp = cols; 2035 for (i=0; i<mb; i++) { 2036 ncol = bi[i+1]-bi[i]; 2037 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2038 row++; 2039 array += ncol; cols_tmp += ncol; 2040 } 2041 ierr = PetscFree(cols);CHKERRQ(ierr); 2042 2043 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2046 *matout = B; 2047 } else { 2048 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2055 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2056 { 2057 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2058 Mat a = aij->A,b = aij->B; 2059 PetscErrorCode ierr; 2060 PetscInt s1,s2,s3; 2061 2062 PetscFunctionBegin; 2063 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2064 if (rr) { 2065 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2066 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2067 /* Overlap communication with computation. */ 2068 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2069 } 2070 if (ll) { 2071 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2072 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2073 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2074 } 2075 /* scale the diagonal block */ 2076 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2077 2078 if (rr) { 2079 /* Do a scatter end and then right scale the off-diagonal block */ 2080 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 #undef __FUNCT__ 2087 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2088 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2089 { 2090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatEqual_MPIAIJ" 2100 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2101 { 2102 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2103 Mat a,b,c,d; 2104 PetscBool flg; 2105 PetscErrorCode ierr; 2106 2107 PetscFunctionBegin; 2108 a = matA->A; b = matA->B; 2109 c = matB->A; d = matB->B; 2110 2111 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2112 if (flg) { 2113 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2114 } 2115 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatCopy_MPIAIJ" 2121 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2125 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2126 2127 PetscFunctionBegin; 2128 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2129 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2130 /* because of the column compression in the off-processor part of the matrix a->B, 2131 the number of columns in a->B and b->B may be different, hence we cannot call 2132 the MatCopy() directly on the two parts. If need be, we can provide a more 2133 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2134 then copying the submatrices */ 2135 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2136 } else { 2137 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2138 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2139 } 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatSetUp_MPIAIJ" 2145 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2146 { 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 /* 2155 Computes the number of nonzeros per row needed for preallocation when X and Y 2156 have different nonzero structure. 2157 */ 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2160 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2161 { 2162 PetscInt i,j,k,nzx,nzy; 2163 2164 PetscFunctionBegin; 2165 /* Set the number of nonzeros in the new matrix */ 2166 for (i=0; i<m; i++) { 2167 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2168 nzx = xi[i+1] - xi[i]; 2169 nzy = yi[i+1] - yi[i]; 2170 nnz[i] = 0; 2171 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2172 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2173 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2174 nnz[i]++; 2175 } 2176 for (; k<nzy; k++) nnz[i]++; 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2184 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2185 { 2186 PetscErrorCode ierr; 2187 PetscInt m = Y->rmap->N; 2188 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2189 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2190 2191 PetscFunctionBegin; 2192 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatAXPY_MPIAIJ" 2198 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2199 { 2200 PetscErrorCode ierr; 2201 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2202 PetscBLASInt bnz,one=1; 2203 Mat_SeqAIJ *x,*y; 2204 2205 PetscFunctionBegin; 2206 if (str == SAME_NONZERO_PATTERN) { 2207 PetscScalar alpha = a; 2208 x = (Mat_SeqAIJ*)xx->A->data; 2209 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2210 y = (Mat_SeqAIJ*)yy->A->data; 2211 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2212 x = (Mat_SeqAIJ*)xx->B->data; 2213 y = (Mat_SeqAIJ*)yy->B->data; 2214 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2215 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2216 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2217 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2218 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2219 } else { 2220 Mat B; 2221 PetscInt *nnz_d,*nnz_o; 2222 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2223 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2224 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2225 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2226 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2227 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2228 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2229 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2230 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2231 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2232 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2233 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2234 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2235 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatConjugate_MPIAIJ" 2244 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2245 { 2246 #if defined(PETSC_USE_COMPLEX) 2247 PetscErrorCode ierr; 2248 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2249 2250 PetscFunctionBegin; 2251 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2252 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2253 #else 2254 PetscFunctionBegin; 2255 #endif 2256 PetscFunctionReturn(0); 2257 } 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatRealPart_MPIAIJ" 2261 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2262 { 2263 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2264 PetscErrorCode ierr; 2265 2266 PetscFunctionBegin; 2267 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2268 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2274 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2275 { 2276 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2277 PetscErrorCode ierr; 2278 2279 PetscFunctionBegin; 2280 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2281 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 #undef __FUNCT__ 2286 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2287 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2288 { 2289 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2290 PetscErrorCode ierr; 2291 PetscInt i,*idxb = 0; 2292 PetscScalar *va,*vb; 2293 Vec vtmp; 2294 2295 PetscFunctionBegin; 2296 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2297 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2298 if (idx) { 2299 for (i=0; i<A->rmap->n; i++) { 2300 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2301 } 2302 } 2303 2304 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2305 if (idx) { 2306 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2307 } 2308 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2309 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2310 2311 for (i=0; i<A->rmap->n; i++) { 2312 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2313 va[i] = vb[i]; 2314 if (idx) idx[i] = a->garray[idxb[i]]; 2315 } 2316 } 2317 2318 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2319 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2320 ierr = PetscFree(idxb);CHKERRQ(ierr); 2321 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2327 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 PetscInt i,*idxb = 0; 2332 PetscScalar *va,*vb; 2333 Vec vtmp; 2334 2335 PetscFunctionBegin; 2336 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2337 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2338 if (idx) { 2339 for (i=0; i<A->cmap->n; i++) { 2340 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2341 } 2342 } 2343 2344 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2345 if (idx) { 2346 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2347 } 2348 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2349 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2350 2351 for (i=0; i<A->rmap->n; i++) { 2352 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2353 va[i] = vb[i]; 2354 if (idx) idx[i] = a->garray[idxb[i]]; 2355 } 2356 } 2357 2358 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2359 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2360 ierr = PetscFree(idxb);CHKERRQ(ierr); 2361 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #undef __FUNCT__ 2366 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2367 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2368 { 2369 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2370 PetscInt n = A->rmap->n; 2371 PetscInt cstart = A->cmap->rstart; 2372 PetscInt *cmap = mat->garray; 2373 PetscInt *diagIdx, *offdiagIdx; 2374 Vec diagV, offdiagV; 2375 PetscScalar *a, *diagA, *offdiagA; 2376 PetscInt r; 2377 PetscErrorCode ierr; 2378 2379 PetscFunctionBegin; 2380 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2381 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2382 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2383 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2384 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2385 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2386 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2387 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2388 for (r = 0; r < n; ++r) { 2389 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2390 a[r] = diagA[r]; 2391 idx[r] = cstart + diagIdx[r]; 2392 } else { 2393 a[r] = offdiagA[r]; 2394 idx[r] = cmap[offdiagIdx[r]]; 2395 } 2396 } 2397 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2398 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2399 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2401 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2402 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2408 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2409 { 2410 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2411 PetscInt n = A->rmap->n; 2412 PetscInt cstart = A->cmap->rstart; 2413 PetscInt *cmap = mat->garray; 2414 PetscInt *diagIdx, *offdiagIdx; 2415 Vec diagV, offdiagV; 2416 PetscScalar *a, *diagA, *offdiagA; 2417 PetscInt r; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2422 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2423 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2424 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2425 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2426 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2427 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2428 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2429 for (r = 0; r < n; ++r) { 2430 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2431 a[r] = diagA[r]; 2432 idx[r] = cstart + diagIdx[r]; 2433 } else { 2434 a[r] = offdiagA[r]; 2435 idx[r] = cmap[offdiagIdx[r]]; 2436 } 2437 } 2438 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2439 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2440 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2441 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2442 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2443 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2449 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2450 { 2451 PetscErrorCode ierr; 2452 Mat *dummy; 2453 2454 PetscFunctionBegin; 2455 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2456 *newmat = *dummy; 2457 ierr = PetscFree(dummy);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2463 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2464 { 2465 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2466 PetscErrorCode ierr; 2467 2468 PetscFunctionBegin; 2469 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2470 A->factorerrortype = a->A->factorerrortype; 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2476 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2477 { 2478 PetscErrorCode ierr; 2479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2480 2481 PetscFunctionBegin; 2482 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2483 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2484 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 PetscFunctionReturn(0); 2487 } 2488 2489 #undef __FUNCT__ 2490 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2492 { 2493 PetscFunctionBegin; 2494 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2495 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2501 /*@ 2502 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2503 2504 Collective on Mat 2505 2506 Input Parameters: 2507 + A - the matrix 2508 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2509 2510 Level: advanced 2511 2512 @*/ 2513 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2514 { 2515 PetscErrorCode ierr; 2516 2517 PetscFunctionBegin; 2518 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2525 { 2526 PetscErrorCode ierr; 2527 PetscBool sc = PETSC_FALSE,flg; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2531 ierr = PetscObjectOptionsBegin((PetscObject)A); 2532 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2533 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2534 if (flg) { 2535 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2536 } 2537 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 #undef __FUNCT__ 2542 #define __FUNCT__ "MatShift_MPIAIJ" 2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2547 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2570 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2571 if (d) { 2572 PetscInt rstart; 2573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2574 *d += rstart; 2575 2576 } 2577 PetscFunctionReturn(0); 2578 } 2579 2580 2581 /* -------------------------------------------------------------------*/ 2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2583 MatGetRow_MPIAIJ, 2584 MatRestoreRow_MPIAIJ, 2585 MatMult_MPIAIJ, 2586 /* 4*/ MatMultAdd_MPIAIJ, 2587 MatMultTranspose_MPIAIJ, 2588 MatMultTransposeAdd_MPIAIJ, 2589 0, 2590 0, 2591 0, 2592 /*10*/ 0, 2593 0, 2594 0, 2595 MatSOR_MPIAIJ, 2596 MatTranspose_MPIAIJ, 2597 /*15*/ MatGetInfo_MPIAIJ, 2598 MatEqual_MPIAIJ, 2599 MatGetDiagonal_MPIAIJ, 2600 MatDiagonalScale_MPIAIJ, 2601 MatNorm_MPIAIJ, 2602 /*20*/ MatAssemblyBegin_MPIAIJ, 2603 MatAssemblyEnd_MPIAIJ, 2604 MatSetOption_MPIAIJ, 2605 MatZeroEntries_MPIAIJ, 2606 /*24*/ MatZeroRows_MPIAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*29*/ MatSetUp_MPIAIJ, 2612 0, 2613 0, 2614 MatGetDiagonalBlock_MPIAIJ, 2615 0, 2616 /*34*/ MatDuplicate_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*39*/ MatAXPY_MPIAIJ, 2622 MatGetSubMatrices_MPIAIJ, 2623 MatIncreaseOverlap_MPIAIJ, 2624 MatGetValues_MPIAIJ, 2625 MatCopy_MPIAIJ, 2626 /*44*/ MatGetRowMax_MPIAIJ, 2627 MatScale_MPIAIJ, 2628 MatShift_MPIAIJ, 2629 MatDiagonalSet_MPIAIJ, 2630 MatZeroRowsColumns_MPIAIJ, 2631 /*49*/ MatSetRandom_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*54*/ MatFDColoringCreate_MPIXAIJ, 2637 0, 2638 MatSetUnfactored_MPIAIJ, 2639 MatPermute_MPIAIJ, 2640 0, 2641 /*59*/ MatGetSubMatrix_MPIAIJ, 2642 MatDestroy_MPIAIJ, 2643 MatView_MPIAIJ, 2644 0, 2645 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2646 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2647 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2652 MatGetRowMinAbs_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*75*/ MatFDColoringApply_AIJ, 2658 MatSetFromOptions_MPIAIJ, 2659 0, 2660 0, 2661 MatFindZeroDiagonals_MPIAIJ, 2662 /*80*/ 0, 2663 0, 2664 0, 2665 /*83*/ MatLoad_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2672 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2673 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2674 MatPtAP_MPIAIJ_MPIAIJ, 2675 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2676 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*99*/ 0, 2682 0, 2683 0, 2684 MatConjugate_MPIAIJ, 2685 0, 2686 /*104*/MatSetValuesRow_MPIAIJ, 2687 MatRealPart_MPIAIJ, 2688 MatImaginaryPart_MPIAIJ, 2689 0, 2690 0, 2691 /*109*/0, 2692 0, 2693 MatGetRowMin_MPIAIJ, 2694 0, 2695 MatMissingDiagonal_MPIAIJ, 2696 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2697 0, 2698 MatGetGhosts_MPIAIJ, 2699 0, 2700 0, 2701 /*119*/0, 2702 0, 2703 0, 2704 0, 2705 MatGetMultiProcBlock_MPIAIJ, 2706 /*124*/MatFindNonzeroRows_MPIAIJ, 2707 MatGetColumnNorms_MPIAIJ, 2708 MatInvertBlockDiagonal_MPIAIJ, 2709 0, 2710 MatGetSubMatricesMPI_MPIAIJ, 2711 /*129*/0, 2712 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2713 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2715 0, 2716 /*134*/0, 2717 0, 2718 0, 2719 0, 2720 0, 2721 /*139*/MatSetBlockSizes_MPIAIJ, 2722 0, 2723 0, 2724 MatFDColoringSetUp_MPIXAIJ, 2725 MatFindOffBlockDiagonalEntries_MPIAIJ, 2726 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2727 }; 2728 2729 /* ----------------------------------------------------------------------------------------*/ 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2733 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2734 { 2735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2736 PetscErrorCode ierr; 2737 2738 PetscFunctionBegin; 2739 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2740 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2741 PetscFunctionReturn(0); 2742 } 2743 2744 #undef __FUNCT__ 2745 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2746 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2747 { 2748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2753 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2754 PetscFunctionReturn(0); 2755 } 2756 2757 #undef __FUNCT__ 2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2759 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2760 { 2761 Mat_MPIAIJ *b; 2762 PetscErrorCode ierr; 2763 2764 PetscFunctionBegin; 2765 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2766 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2767 b = (Mat_MPIAIJ*)B->data; 2768 2769 #if defined(PETSC_USE_CTABLE) 2770 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2771 #else 2772 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2773 #endif 2774 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2775 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2776 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2777 2778 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2779 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2780 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2781 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2782 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2783 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2784 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2785 2786 if (!B->preallocated) { 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2789 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2790 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2792 } 2793 2794 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2795 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2796 B->preallocated = PETSC_TRUE; 2797 B->was_assembled = PETSC_FALSE; 2798 B->assembled = PETSC_FALSE;; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2805 { 2806 Mat mat; 2807 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 *newmat = 0; 2812 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2814 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2815 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2816 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2817 a = (Mat_MPIAIJ*)mat->data; 2818 2819 mat->factortype = matin->factortype; 2820 mat->assembled = PETSC_TRUE; 2821 mat->insertmode = NOT_SET_VALUES; 2822 mat->preallocated = PETSC_TRUE; 2823 2824 a->size = oldmat->size; 2825 a->rank = oldmat->rank; 2826 a->donotstash = oldmat->donotstash; 2827 a->roworiented = oldmat->roworiented; 2828 a->rowindices = 0; 2829 a->rowvalues = 0; 2830 a->getrowactive = PETSC_FALSE; 2831 2832 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2833 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2834 2835 if (oldmat->colmap) { 2836 #if defined(PETSC_USE_CTABLE) 2837 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2838 #else 2839 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2840 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2841 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2842 #endif 2843 } else a->colmap = 0; 2844 if (oldmat->garray) { 2845 PetscInt len; 2846 len = oldmat->B->cmap->n; 2847 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2848 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2849 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2850 } else a->garray = 0; 2851 2852 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2853 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2854 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2855 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2856 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2857 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2858 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2859 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2860 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2861 *newmat = mat; 2862 PetscFunctionReturn(0); 2863 } 2864 2865 2866 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatLoad_MPIAIJ" 2869 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2870 { 2871 PetscScalar *vals,*svals; 2872 MPI_Comm comm; 2873 PetscErrorCode ierr; 2874 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2875 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2876 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2877 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2878 PetscInt cend,cstart,n,*rowners; 2879 int fd; 2880 PetscInt bs = newMat->rmap->bs; 2881 2882 PetscFunctionBegin; 2883 /* force binary viewer to load .info file if it has not yet done so */ 2884 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2885 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2886 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2887 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2888 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2889 if (!rank) { 2890 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2891 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2892 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2893 } 2894 2895 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2896 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2897 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2898 if (bs < 0) bs = 1; 2899 2900 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2901 M = header[1]; N = header[2]; 2902 2903 /* If global sizes are set, check if they are consistent with that given in the file */ 2904 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2905 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2906 2907 /* determine ownership of all (block) rows */ 2908 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2909 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2910 else m = newMat->rmap->n; /* Set by user */ 2911 2912 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2914 2915 /* First process needs enough room for process with most rows */ 2916 if (!rank) { 2917 mmax = rowners[1]; 2918 for (i=2; i<=size; i++) { 2919 mmax = PetscMax(mmax, rowners[i]); 2920 } 2921 } else mmax = -1; /* unused, but compilers complain */ 2922 2923 rowners[0] = 0; 2924 for (i=2; i<=size; i++) { 2925 rowners[i] += rowners[i-1]; 2926 } 2927 rstart = rowners[rank]; 2928 rend = rowners[rank+1]; 2929 2930 /* distribute row lengths to all processors */ 2931 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2932 if (!rank) { 2933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2934 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2935 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2936 for (j=0; j<m; j++) { 2937 procsnz[0] += ourlens[j]; 2938 } 2939 for (i=1; i<size; i++) { 2940 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2941 /* calculate the number of nonzeros on each processor */ 2942 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2943 procsnz[i] += rowlengths[j]; 2944 } 2945 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2946 } 2947 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2948 } else { 2949 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2950 } 2951 2952 if (!rank) { 2953 /* determine max buffer needed and allocate it */ 2954 maxnz = 0; 2955 for (i=0; i<size; i++) { 2956 maxnz = PetscMax(maxnz,procsnz[i]); 2957 } 2958 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2959 2960 /* read in my part of the matrix column indices */ 2961 nz = procsnz[0]; 2962 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2963 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2964 2965 /* read in every one elses and ship off */ 2966 for (i=1; i<size; i++) { 2967 nz = procsnz[i]; 2968 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2969 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2970 } 2971 ierr = PetscFree(cols);CHKERRQ(ierr); 2972 } else { 2973 /* determine buffer space needed for message */ 2974 nz = 0; 2975 for (i=0; i<m; i++) { 2976 nz += ourlens[i]; 2977 } 2978 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2979 2980 /* receive message of column indices*/ 2981 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2982 } 2983 2984 /* determine column ownership if matrix is not square */ 2985 if (N != M) { 2986 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2987 else n = newMat->cmap->n; 2988 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2989 cstart = cend - n; 2990 } else { 2991 cstart = rstart; 2992 cend = rend; 2993 n = cend - cstart; 2994 } 2995 2996 /* loop over local rows, determining number of off diagonal entries */ 2997 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2998 jj = 0; 2999 for (i=0; i<m; i++) { 3000 for (j=0; j<ourlens[i]; j++) { 3001 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3002 jj++; 3003 } 3004 } 3005 3006 for (i=0; i<m; i++) { 3007 ourlens[i] -= offlens[i]; 3008 } 3009 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3010 3011 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3012 3013 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3014 3015 for (i=0; i<m; i++) { 3016 ourlens[i] += offlens[i]; 3017 } 3018 3019 if (!rank) { 3020 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3021 3022 /* read in my part of the matrix numerical values */ 3023 nz = procsnz[0]; 3024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3025 3026 /* insert into matrix */ 3027 jj = rstart; 3028 smycols = mycols; 3029 svals = vals; 3030 for (i=0; i<m; i++) { 3031 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3032 smycols += ourlens[i]; 3033 svals += ourlens[i]; 3034 jj++; 3035 } 3036 3037 /* read in other processors and ship out */ 3038 for (i=1; i<size; i++) { 3039 nz = procsnz[i]; 3040 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3041 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3044 } else { 3045 /* receive numeric values */ 3046 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3047 3048 /* receive message of values*/ 3049 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3050 3051 /* insert into matrix */ 3052 jj = rstart; 3053 smycols = mycols; 3054 svals = vals; 3055 for (i=0; i<m; i++) { 3056 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3057 smycols += ourlens[i]; 3058 svals += ourlens[i]; 3059 jj++; 3060 } 3061 } 3062 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3063 ierr = PetscFree(vals);CHKERRQ(ierr); 3064 ierr = PetscFree(mycols);CHKERRQ(ierr); 3065 ierr = PetscFree(rowners);CHKERRQ(ierr); 3066 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3067 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 PetscFunctionReturn(0); 3069 } 3070 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3073 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3075 { 3076 PetscErrorCode ierr; 3077 IS iscol_local; 3078 PetscInt csize; 3079 3080 PetscFunctionBegin; 3081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3082 if (call == MAT_REUSE_MATRIX) { 3083 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3084 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3085 } else { 3086 /* check if we are grabbing all columns*/ 3087 PetscBool isstride; 3088 PetscMPIInt lisstride = 0,gisstride; 3089 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3090 if (isstride) { 3091 PetscInt start,len,mstart,mlen; 3092 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3093 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3094 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3095 if (mstart == start && mlen-mstart == len) lisstride = 1; 3096 } 3097 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3098 if (gisstride) { 3099 PetscInt N; 3100 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3101 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3102 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3103 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3104 } else { 3105 PetscInt cbs; 3106 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3107 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3108 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3109 } 3110 } 3111 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3112 if (call == MAT_INITIAL_MATRIX) { 3113 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3114 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3115 } 3116 PetscFunctionReturn(0); 3117 } 3118 3119 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3122 /* 3123 Not great since it makes two copies of the submatrix, first an SeqAIJ 3124 in local and then by concatenating the local matrices the end result. 3125 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3126 3127 Note: This requires a sequential iscol with all indices. 3128 */ 3129 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3130 { 3131 PetscErrorCode ierr; 3132 PetscMPIInt rank,size; 3133 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3134 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3135 PetscBool allcolumns, colflag; 3136 Mat M,Mreuse; 3137 MatScalar *vwork,*aa; 3138 MPI_Comm comm; 3139 Mat_SeqAIJ *aij; 3140 3141 PetscFunctionBegin; 3142 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3143 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3145 3146 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3147 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3148 if (colflag && ncol == mat->cmap->N) { 3149 allcolumns = PETSC_TRUE; 3150 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3151 } else { 3152 allcolumns = PETSC_FALSE; 3153 } 3154 if (call == MAT_REUSE_MATRIX) { 3155 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3156 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3157 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3158 } else { 3159 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3160 } 3161 3162 /* 3163 m - number of local rows 3164 n - number of columns (same on all processors) 3165 rstart - first row in new global matrix generated 3166 */ 3167 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3168 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3169 if (call == MAT_INITIAL_MATRIX) { 3170 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3171 ii = aij->i; 3172 jj = aij->j; 3173 3174 /* 3175 Determine the number of non-zeros in the diagonal and off-diagonal 3176 portions of the matrix in order to do correct preallocation 3177 */ 3178 3179 /* first get start and end of "diagonal" columns */ 3180 if (csize == PETSC_DECIDE) { 3181 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3182 if (mglobal == n) { /* square matrix */ 3183 nlocal = m; 3184 } else { 3185 nlocal = n/size + ((n % size) > rank); 3186 } 3187 } else { 3188 nlocal = csize; 3189 } 3190 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3191 rstart = rend - nlocal; 3192 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3193 3194 /* next, compute all the lengths */ 3195 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3196 olens = dlens + m; 3197 for (i=0; i<m; i++) { 3198 jend = ii[i+1] - ii[i]; 3199 olen = 0; 3200 dlen = 0; 3201 for (j=0; j<jend; j++) { 3202 if (*jj < rstart || *jj >= rend) olen++; 3203 else dlen++; 3204 jj++; 3205 } 3206 olens[i] = olen; 3207 dlens[i] = dlen; 3208 } 3209 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3210 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3211 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3212 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3213 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3214 ierr = PetscFree(dlens);CHKERRQ(ierr); 3215 } else { 3216 PetscInt ml,nl; 3217 3218 M = *newmat; 3219 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3220 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3221 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3222 /* 3223 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3224 rather than the slower MatSetValues(). 3225 */ 3226 M->was_assembled = PETSC_TRUE; 3227 M->assembled = PETSC_FALSE; 3228 } 3229 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3230 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3231 ii = aij->i; 3232 jj = aij->j; 3233 aa = aij->a; 3234 for (i=0; i<m; i++) { 3235 row = rstart + i; 3236 nz = ii[i+1] - ii[i]; 3237 cwork = jj; jj += nz; 3238 vwork = aa; aa += nz; 3239 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3240 } 3241 3242 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 *newmat = M; 3245 3246 /* save submatrix used in processor for next request */ 3247 if (call == MAT_INITIAL_MATRIX) { 3248 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3249 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3250 } 3251 PetscFunctionReturn(0); 3252 } 3253 3254 #undef __FUNCT__ 3255 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3256 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3257 { 3258 PetscInt m,cstart, cend,j,nnz,i,d; 3259 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3260 const PetscInt *JJ; 3261 PetscScalar *values; 3262 PetscErrorCode ierr; 3263 3264 PetscFunctionBegin; 3265 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3266 3267 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3268 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3269 m = B->rmap->n; 3270 cstart = B->cmap->rstart; 3271 cend = B->cmap->rend; 3272 rstart = B->rmap->rstart; 3273 3274 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3275 3276 #if defined(PETSC_USE_DEBUGGING) 3277 for (i=0; i<m; i++) { 3278 nnz = Ii[i+1]- Ii[i]; 3279 JJ = J + Ii[i]; 3280 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3281 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3282 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3283 } 3284 #endif 3285 3286 for (i=0; i<m; i++) { 3287 nnz = Ii[i+1]- Ii[i]; 3288 JJ = J + Ii[i]; 3289 nnz_max = PetscMax(nnz_max,nnz); 3290 d = 0; 3291 for (j=0; j<nnz; j++) { 3292 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3293 } 3294 d_nnz[i] = d; 3295 o_nnz[i] = nnz - d; 3296 } 3297 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3298 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3299 3300 if (v) values = (PetscScalar*)v; 3301 else { 3302 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3303 } 3304 3305 for (i=0; i<m; i++) { 3306 ii = i + rstart; 3307 nnz = Ii[i+1]- Ii[i]; 3308 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3309 } 3310 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 3313 if (!v) { 3314 ierr = PetscFree(values);CHKERRQ(ierr); 3315 } 3316 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 #undef __FUNCT__ 3321 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3322 /*@ 3323 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3324 (the default parallel PETSc format). 3325 3326 Collective on MPI_Comm 3327 3328 Input Parameters: 3329 + B - the matrix 3330 . i - the indices into j for the start of each local row (starts with zero) 3331 . j - the column indices for each local row (starts with zero) 3332 - v - optional values in the matrix 3333 3334 Level: developer 3335 3336 Notes: 3337 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3338 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3339 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3340 3341 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3342 3343 The format which is used for the sparse matrix input, is equivalent to a 3344 row-major ordering.. i.e for the following matrix, the input data expected is 3345 as shown 3346 3347 $ 1 0 0 3348 $ 2 0 3 P0 3349 $ ------- 3350 $ 4 5 6 P1 3351 $ 3352 $ Process0 [P0]: rows_owned=[0,1] 3353 $ i = {0,1,3} [size = nrow+1 = 2+1] 3354 $ j = {0,0,2} [size = 3] 3355 $ v = {1,2,3} [size = 3] 3356 $ 3357 $ Process1 [P1]: rows_owned=[2] 3358 $ i = {0,3} [size = nrow+1 = 1+1] 3359 $ j = {0,1,2} [size = 3] 3360 $ v = {4,5,6} [size = 3] 3361 3362 .keywords: matrix, aij, compressed row, sparse, parallel 3363 3364 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3365 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3366 @*/ 3367 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3368 { 3369 PetscErrorCode ierr; 3370 3371 PetscFunctionBegin; 3372 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3378 /*@C 3379 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3380 (the default parallel PETSc format). For good matrix assembly performance 3381 the user should preallocate the matrix storage by setting the parameters 3382 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3383 performance can be increased by more than a factor of 50. 3384 3385 Collective on MPI_Comm 3386 3387 Input Parameters: 3388 + B - the matrix 3389 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3390 (same value is used for all local rows) 3391 . d_nnz - array containing the number of nonzeros in the various rows of the 3392 DIAGONAL portion of the local submatrix (possibly different for each row) 3393 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3394 The size of this array is equal to the number of local rows, i.e 'm'. 3395 For matrices that will be factored, you must leave room for (and set) 3396 the diagonal entry even if it is zero. 3397 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3398 submatrix (same value is used for all local rows). 3399 - o_nnz - array containing the number of nonzeros in the various rows of the 3400 OFF-DIAGONAL portion of the local submatrix (possibly different for 3401 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3402 structure. The size of this array is equal to the number 3403 of local rows, i.e 'm'. 3404 3405 If the *_nnz parameter is given then the *_nz parameter is ignored 3406 3407 The AIJ format (also called the Yale sparse matrix format or 3408 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3409 storage. The stored row and column indices begin with zero. 3410 See Users-Manual: ch_mat for details. 3411 3412 The parallel matrix is partitioned such that the first m0 rows belong to 3413 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3414 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3415 3416 The DIAGONAL portion of the local submatrix of a processor can be defined 3417 as the submatrix which is obtained by extraction the part corresponding to 3418 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3419 first row that belongs to the processor, r2 is the last row belonging to 3420 the this processor, and c1-c2 is range of indices of the local part of a 3421 vector suitable for applying the matrix to. This is an mxn matrix. In the 3422 common case of a square matrix, the row and column ranges are the same and 3423 the DIAGONAL part is also square. The remaining portion of the local 3424 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3425 3426 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3427 3428 You can call MatGetInfo() to get information on how effective the preallocation was; 3429 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3430 You can also run with the option -info and look for messages with the string 3431 malloc in them to see if additional memory allocation was needed. 3432 3433 Example usage: 3434 3435 Consider the following 8x8 matrix with 34 non-zero values, that is 3436 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3437 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3438 as follows: 3439 3440 .vb 3441 1 2 0 | 0 3 0 | 0 4 3442 Proc0 0 5 6 | 7 0 0 | 8 0 3443 9 0 10 | 11 0 0 | 12 0 3444 ------------------------------------- 3445 13 0 14 | 15 16 17 | 0 0 3446 Proc1 0 18 0 | 19 20 21 | 0 0 3447 0 0 0 | 22 23 0 | 24 0 3448 ------------------------------------- 3449 Proc2 25 26 27 | 0 0 28 | 29 0 3450 30 0 0 | 31 32 33 | 0 34 3451 .ve 3452 3453 This can be represented as a collection of submatrices as: 3454 3455 .vb 3456 A B C 3457 D E F 3458 G H I 3459 .ve 3460 3461 Where the submatrices A,B,C are owned by proc0, D,E,F are 3462 owned by proc1, G,H,I are owned by proc2. 3463 3464 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3465 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3466 The 'M','N' parameters are 8,8, and have the same values on all procs. 3467 3468 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3469 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3470 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3471 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3472 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3473 matrix, ans [DF] as another SeqAIJ matrix. 3474 3475 When d_nz, o_nz parameters are specified, d_nz storage elements are 3476 allocated for every row of the local diagonal submatrix, and o_nz 3477 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3478 One way to choose d_nz and o_nz is to use the max nonzerors per local 3479 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3480 In this case, the values of d_nz,o_nz are: 3481 .vb 3482 proc0 : dnz = 2, o_nz = 2 3483 proc1 : dnz = 3, o_nz = 2 3484 proc2 : dnz = 1, o_nz = 4 3485 .ve 3486 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3487 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3488 for proc3. i.e we are using 12+15+10=37 storage locations to store 3489 34 values. 3490 3491 When d_nnz, o_nnz parameters are specified, the storage is specified 3492 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3493 In the above case the values for d_nnz,o_nnz are: 3494 .vb 3495 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3496 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3497 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3498 .ve 3499 Here the space allocated is sum of all the above values i.e 34, and 3500 hence pre-allocation is perfect. 3501 3502 Level: intermediate 3503 3504 .keywords: matrix, aij, compressed row, sparse, parallel 3505 3506 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3507 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3508 @*/ 3509 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3510 { 3511 PetscErrorCode ierr; 3512 3513 PetscFunctionBegin; 3514 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3515 PetscValidType(B,1); 3516 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3517 PetscFunctionReturn(0); 3518 } 3519 3520 #undef __FUNCT__ 3521 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3522 /*@ 3523 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3524 CSR format the local rows. 3525 3526 Collective on MPI_Comm 3527 3528 Input Parameters: 3529 + comm - MPI communicator 3530 . m - number of local rows (Cannot be PETSC_DECIDE) 3531 . n - This value should be the same as the local size used in creating the 3532 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3533 calculated if N is given) For square matrices n is almost always m. 3534 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3535 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3536 . i - row indices 3537 . j - column indices 3538 - a - matrix values 3539 3540 Output Parameter: 3541 . mat - the matrix 3542 3543 Level: intermediate 3544 3545 Notes: 3546 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3547 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3548 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3549 3550 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3551 3552 The format which is used for the sparse matrix input, is equivalent to a 3553 row-major ordering.. i.e for the following matrix, the input data expected is 3554 as shown 3555 3556 $ 1 0 0 3557 $ 2 0 3 P0 3558 $ ------- 3559 $ 4 5 6 P1 3560 $ 3561 $ Process0 [P0]: rows_owned=[0,1] 3562 $ i = {0,1,3} [size = nrow+1 = 2+1] 3563 $ j = {0,0,2} [size = 3] 3564 $ v = {1,2,3} [size = 3] 3565 $ 3566 $ Process1 [P1]: rows_owned=[2] 3567 $ i = {0,3} [size = nrow+1 = 1+1] 3568 $ j = {0,1,2} [size = 3] 3569 $ v = {4,5,6} [size = 3] 3570 3571 .keywords: matrix, aij, compressed row, sparse, parallel 3572 3573 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3574 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3575 @*/ 3576 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3577 { 3578 PetscErrorCode ierr; 3579 3580 PetscFunctionBegin; 3581 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3582 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3583 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3584 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3585 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3586 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3587 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590 3591 #undef __FUNCT__ 3592 #define __FUNCT__ "MatCreateAIJ" 3593 /*@C 3594 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3595 (the default parallel PETSc format). For good matrix assembly performance 3596 the user should preallocate the matrix storage by setting the parameters 3597 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3598 performance can be increased by more than a factor of 50. 3599 3600 Collective on MPI_Comm 3601 3602 Input Parameters: 3603 + comm - MPI communicator 3604 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3605 This value should be the same as the local size used in creating the 3606 y vector for the matrix-vector product y = Ax. 3607 . n - This value should be the same as the local size used in creating the 3608 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3609 calculated if N is given) For square matrices n is almost always m. 3610 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3611 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3612 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3613 (same value is used for all local rows) 3614 . d_nnz - array containing the number of nonzeros in the various rows of the 3615 DIAGONAL portion of the local submatrix (possibly different for each row) 3616 or NULL, if d_nz is used to specify the nonzero structure. 3617 The size of this array is equal to the number of local rows, i.e 'm'. 3618 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3619 submatrix (same value is used for all local rows). 3620 - o_nnz - array containing the number of nonzeros in the various rows of the 3621 OFF-DIAGONAL portion of the local submatrix (possibly different for 3622 each row) or NULL, if o_nz is used to specify the nonzero 3623 structure. The size of this array is equal to the number 3624 of local rows, i.e 'm'. 3625 3626 Output Parameter: 3627 . A - the matrix 3628 3629 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3630 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3631 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3632 3633 Notes: 3634 If the *_nnz parameter is given then the *_nz parameter is ignored 3635 3636 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3637 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3638 storage requirements for this matrix. 3639 3640 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3641 processor than it must be used on all processors that share the object for 3642 that argument. 3643 3644 The user MUST specify either the local or global matrix dimensions 3645 (possibly both). 3646 3647 The parallel matrix is partitioned across processors such that the 3648 first m0 rows belong to process 0, the next m1 rows belong to 3649 process 1, the next m2 rows belong to process 2 etc.. where 3650 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3651 values corresponding to [m x N] submatrix. 3652 3653 The columns are logically partitioned with the n0 columns belonging 3654 to 0th partition, the next n1 columns belonging to the next 3655 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3656 3657 The DIAGONAL portion of the local submatrix on any given processor 3658 is the submatrix corresponding to the rows and columns m,n 3659 corresponding to the given processor. i.e diagonal matrix on 3660 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3661 etc. The remaining portion of the local submatrix [m x (N-n)] 3662 constitute the OFF-DIAGONAL portion. The example below better 3663 illustrates this concept. 3664 3665 For a square global matrix we define each processor's diagonal portion 3666 to be its local rows and the corresponding columns (a square submatrix); 3667 each processor's off-diagonal portion encompasses the remainder of the 3668 local matrix (a rectangular submatrix). 3669 3670 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3671 3672 When calling this routine with a single process communicator, a matrix of 3673 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3674 type of communicator, use the construction mechanism: 3675 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3676 3677 By default, this format uses inodes (identical nodes) when possible. 3678 We search for consecutive rows with the same nonzero structure, thereby 3679 reusing matrix information to achieve increased efficiency. 3680 3681 Options Database Keys: 3682 + -mat_no_inode - Do not use inodes 3683 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3684 - -mat_aij_oneindex - Internally use indexing starting at 1 3685 rather than 0. Note that when calling MatSetValues(), 3686 the user still MUST index entries starting at 0! 3687 3688 3689 Example usage: 3690 3691 Consider the following 8x8 matrix with 34 non-zero values, that is 3692 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3693 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3694 as follows: 3695 3696 .vb 3697 1 2 0 | 0 3 0 | 0 4 3698 Proc0 0 5 6 | 7 0 0 | 8 0 3699 9 0 10 | 11 0 0 | 12 0 3700 ------------------------------------- 3701 13 0 14 | 15 16 17 | 0 0 3702 Proc1 0 18 0 | 19 20 21 | 0 0 3703 0 0 0 | 22 23 0 | 24 0 3704 ------------------------------------- 3705 Proc2 25 26 27 | 0 0 28 | 29 0 3706 30 0 0 | 31 32 33 | 0 34 3707 .ve 3708 3709 This can be represented as a collection of submatrices as: 3710 3711 .vb 3712 A B C 3713 D E F 3714 G H I 3715 .ve 3716 3717 Where the submatrices A,B,C are owned by proc0, D,E,F are 3718 owned by proc1, G,H,I are owned by proc2. 3719 3720 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3721 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3722 The 'M','N' parameters are 8,8, and have the same values on all procs. 3723 3724 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3725 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3726 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3727 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3728 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3729 matrix, ans [DF] as another SeqAIJ matrix. 3730 3731 When d_nz, o_nz parameters are specified, d_nz storage elements are 3732 allocated for every row of the local diagonal submatrix, and o_nz 3733 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3734 One way to choose d_nz and o_nz is to use the max nonzerors per local 3735 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3736 In this case, the values of d_nz,o_nz are: 3737 .vb 3738 proc0 : dnz = 2, o_nz = 2 3739 proc1 : dnz = 3, o_nz = 2 3740 proc2 : dnz = 1, o_nz = 4 3741 .ve 3742 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3743 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3744 for proc3. i.e we are using 12+15+10=37 storage locations to store 3745 34 values. 3746 3747 When d_nnz, o_nnz parameters are specified, the storage is specified 3748 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3749 In the above case the values for d_nnz,o_nnz are: 3750 .vb 3751 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3752 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3753 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3754 .ve 3755 Here the space allocated is sum of all the above values i.e 34, and 3756 hence pre-allocation is perfect. 3757 3758 Level: intermediate 3759 3760 .keywords: matrix, aij, compressed row, sparse, parallel 3761 3762 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3763 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3764 @*/ 3765 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3766 { 3767 PetscErrorCode ierr; 3768 PetscMPIInt size; 3769 3770 PetscFunctionBegin; 3771 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3772 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3773 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3774 if (size > 1) { 3775 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3776 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3777 } else { 3778 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3779 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3780 } 3781 PetscFunctionReturn(0); 3782 } 3783 3784 #undef __FUNCT__ 3785 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3786 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3787 { 3788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3789 PetscBool flg; 3790 PetscErrorCode ierr; 3791 3792 PetscFunctionBegin; 3793 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3794 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3795 if (Ad) *Ad = a->A; 3796 if (Ao) *Ao = a->B; 3797 if (colmap) *colmap = a->garray; 3798 PetscFunctionReturn(0); 3799 } 3800 3801 #undef __FUNCT__ 3802 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3804 { 3805 PetscErrorCode ierr; 3806 PetscInt m,N,i,rstart,nnz,Ii; 3807 PetscInt *indx; 3808 PetscScalar *values; 3809 3810 PetscFunctionBegin; 3811 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3812 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3813 PetscInt *dnz,*onz,sum,bs,cbs; 3814 3815 if (n == PETSC_DECIDE) { 3816 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3817 } 3818 /* Check sum(n) = N */ 3819 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3820 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3821 3822 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3823 rstart -= m; 3824 3825 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3826 for (i=0; i<m; i++) { 3827 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3828 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3829 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3830 } 3831 3832 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3833 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3834 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3835 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3836 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3837 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3838 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3839 } 3840 3841 /* numeric phase */ 3842 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3843 for (i=0; i<m; i++) { 3844 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3845 Ii = i + rstart; 3846 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3847 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3848 } 3849 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatFileSplit" 3856 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3857 { 3858 PetscErrorCode ierr; 3859 PetscMPIInt rank; 3860 PetscInt m,N,i,rstart,nnz; 3861 size_t len; 3862 const PetscInt *indx; 3863 PetscViewer out; 3864 char *name; 3865 Mat B; 3866 const PetscScalar *values; 3867 3868 PetscFunctionBegin; 3869 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3870 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3871 /* Should this be the type of the diagonal block of A? */ 3872 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3873 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3874 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3875 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3876 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3877 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3878 for (i=0; i<m; i++) { 3879 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3880 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3881 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3882 } 3883 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 3886 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3887 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3888 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3889 sprintf(name,"%s.%d",outfile,rank); 3890 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3891 ierr = PetscFree(name);CHKERRQ(ierr); 3892 ierr = MatView(B,out);CHKERRQ(ierr); 3893 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3894 ierr = MatDestroy(&B);CHKERRQ(ierr); 3895 PetscFunctionReturn(0); 3896 } 3897 3898 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3901 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3902 { 3903 PetscErrorCode ierr; 3904 Mat_Merge_SeqsToMPI *merge; 3905 PetscContainer container; 3906 3907 PetscFunctionBegin; 3908 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3909 if (container) { 3910 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3911 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3912 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3913 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3914 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3915 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3916 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3917 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3918 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3919 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3920 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3921 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3922 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3923 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3924 ierr = PetscFree(merge);CHKERRQ(ierr); 3925 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3926 } 3927 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3928 PetscFunctionReturn(0); 3929 } 3930 3931 #include <../src/mat/utils/freespace.h> 3932 #include <petscbt.h> 3933 3934 #undef __FUNCT__ 3935 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3936 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3937 { 3938 PetscErrorCode ierr; 3939 MPI_Comm comm; 3940 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3941 PetscMPIInt size,rank,taga,*len_s; 3942 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3943 PetscInt proc,m; 3944 PetscInt **buf_ri,**buf_rj; 3945 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3946 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3947 MPI_Request *s_waits,*r_waits; 3948 MPI_Status *status; 3949 MatScalar *aa=a->a; 3950 MatScalar **abuf_r,*ba_i; 3951 Mat_Merge_SeqsToMPI *merge; 3952 PetscContainer container; 3953 3954 PetscFunctionBegin; 3955 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3956 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3957 3958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3959 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3960 3961 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3962 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3963 3964 bi = merge->bi; 3965 bj = merge->bj; 3966 buf_ri = merge->buf_ri; 3967 buf_rj = merge->buf_rj; 3968 3969 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3970 owners = merge->rowmap->range; 3971 len_s = merge->len_s; 3972 3973 /* send and recv matrix values */ 3974 /*-----------------------------*/ 3975 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3976 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3977 3978 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3979 for (proc=0,k=0; proc<size; proc++) { 3980 if (!len_s[proc]) continue; 3981 i = owners[proc]; 3982 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3983 k++; 3984 } 3985 3986 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3987 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3988 ierr = PetscFree(status);CHKERRQ(ierr); 3989 3990 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3991 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3992 3993 /* insert mat values of mpimat */ 3994 /*----------------------------*/ 3995 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3996 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3997 3998 for (k=0; k<merge->nrecv; k++) { 3999 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4000 nrows = *(buf_ri_k[k]); 4001 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4002 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4003 } 4004 4005 /* set values of ba */ 4006 m = merge->rowmap->n; 4007 for (i=0; i<m; i++) { 4008 arow = owners[rank] + i; 4009 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4010 bnzi = bi[i+1] - bi[i]; 4011 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4012 4013 /* add local non-zero vals of this proc's seqmat into ba */ 4014 anzi = ai[arow+1] - ai[arow]; 4015 aj = a->j + ai[arow]; 4016 aa = a->a + ai[arow]; 4017 nextaj = 0; 4018 for (j=0; nextaj<anzi; j++) { 4019 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4020 ba_i[j] += aa[nextaj++]; 4021 } 4022 } 4023 4024 /* add received vals into ba */ 4025 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4026 /* i-th row */ 4027 if (i == *nextrow[k]) { 4028 anzi = *(nextai[k]+1) - *nextai[k]; 4029 aj = buf_rj[k] + *(nextai[k]); 4030 aa = abuf_r[k] + *(nextai[k]); 4031 nextaj = 0; 4032 for (j=0; nextaj<anzi; j++) { 4033 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4034 ba_i[j] += aa[nextaj++]; 4035 } 4036 } 4037 nextrow[k]++; nextai[k]++; 4038 } 4039 } 4040 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4041 } 4042 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 4045 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4046 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4047 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4048 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4049 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4054 4055 #undef __FUNCT__ 4056 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4057 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4058 { 4059 PetscErrorCode ierr; 4060 Mat B_mpi; 4061 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4062 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4063 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4064 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4065 PetscInt len,proc,*dnz,*onz,bs,cbs; 4066 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4067 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4068 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4069 MPI_Status *status; 4070 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4071 PetscBT lnkbt; 4072 Mat_Merge_SeqsToMPI *merge; 4073 PetscContainer container; 4074 4075 PetscFunctionBegin; 4076 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4077 4078 /* make sure it is a PETSc comm */ 4079 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4081 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4082 4083 ierr = PetscNew(&merge);CHKERRQ(ierr); 4084 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4085 4086 /* determine row ownership */ 4087 /*---------------------------------------------------------*/ 4088 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4089 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4090 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4091 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4092 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4093 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4094 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4095 4096 m = merge->rowmap->n; 4097 owners = merge->rowmap->range; 4098 4099 /* determine the number of messages to send, their lengths */ 4100 /*---------------------------------------------------------*/ 4101 len_s = merge->len_s; 4102 4103 len = 0; /* length of buf_si[] */ 4104 merge->nsend = 0; 4105 for (proc=0; proc<size; proc++) { 4106 len_si[proc] = 0; 4107 if (proc == rank) { 4108 len_s[proc] = 0; 4109 } else { 4110 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4111 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4112 } 4113 if (len_s[proc]) { 4114 merge->nsend++; 4115 nrows = 0; 4116 for (i=owners[proc]; i<owners[proc+1]; i++) { 4117 if (ai[i+1] > ai[i]) nrows++; 4118 } 4119 len_si[proc] = 2*(nrows+1); 4120 len += len_si[proc]; 4121 } 4122 } 4123 4124 /* determine the number and length of messages to receive for ij-structure */ 4125 /*-------------------------------------------------------------------------*/ 4126 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4127 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4128 4129 /* post the Irecv of j-structure */ 4130 /*-------------------------------*/ 4131 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4132 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4133 4134 /* post the Isend of j-structure */ 4135 /*--------------------------------*/ 4136 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4137 4138 for (proc=0, k=0; proc<size; proc++) { 4139 if (!len_s[proc]) continue; 4140 i = owners[proc]; 4141 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4142 k++; 4143 } 4144 4145 /* receives and sends of j-structure are complete */ 4146 /*------------------------------------------------*/ 4147 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4148 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4149 4150 /* send and recv i-structure */ 4151 /*---------------------------*/ 4152 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4153 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4154 4155 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4156 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4157 for (proc=0,k=0; proc<size; proc++) { 4158 if (!len_s[proc]) continue; 4159 /* form outgoing message for i-structure: 4160 buf_si[0]: nrows to be sent 4161 [1:nrows]: row index (global) 4162 [nrows+1:2*nrows+1]: i-structure index 4163 */ 4164 /*-------------------------------------------*/ 4165 nrows = len_si[proc]/2 - 1; 4166 buf_si_i = buf_si + nrows+1; 4167 buf_si[0] = nrows; 4168 buf_si_i[0] = 0; 4169 nrows = 0; 4170 for (i=owners[proc]; i<owners[proc+1]; i++) { 4171 anzi = ai[i+1] - ai[i]; 4172 if (anzi) { 4173 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4174 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4175 nrows++; 4176 } 4177 } 4178 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4179 k++; 4180 buf_si += len_si[proc]; 4181 } 4182 4183 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4184 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4185 4186 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4187 for (i=0; i<merge->nrecv; i++) { 4188 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4189 } 4190 4191 ierr = PetscFree(len_si);CHKERRQ(ierr); 4192 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4193 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4194 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4195 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4196 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4197 ierr = PetscFree(status);CHKERRQ(ierr); 4198 4199 /* compute a local seq matrix in each processor */ 4200 /*----------------------------------------------*/ 4201 /* allocate bi array and free space for accumulating nonzero column info */ 4202 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4203 bi[0] = 0; 4204 4205 /* create and initialize a linked list */ 4206 nlnk = N+1; 4207 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4208 4209 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4210 len = ai[owners[rank+1]] - ai[owners[rank]]; 4211 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4212 4213 current_space = free_space; 4214 4215 /* determine symbolic info for each local row */ 4216 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4217 4218 for (k=0; k<merge->nrecv; k++) { 4219 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4220 nrows = *buf_ri_k[k]; 4221 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4222 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4223 } 4224 4225 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4226 len = 0; 4227 for (i=0; i<m; i++) { 4228 bnzi = 0; 4229 /* add local non-zero cols of this proc's seqmat into lnk */ 4230 arow = owners[rank] + i; 4231 anzi = ai[arow+1] - ai[arow]; 4232 aj = a->j + ai[arow]; 4233 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4234 bnzi += nlnk; 4235 /* add received col data into lnk */ 4236 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4237 if (i == *nextrow[k]) { /* i-th row */ 4238 anzi = *(nextai[k]+1) - *nextai[k]; 4239 aj = buf_rj[k] + *nextai[k]; 4240 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4241 bnzi += nlnk; 4242 nextrow[k]++; nextai[k]++; 4243 } 4244 } 4245 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4246 4247 /* if free space is not available, make more free space */ 4248 if (current_space->local_remaining<bnzi) { 4249 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4250 nspacedouble++; 4251 } 4252 /* copy data into free space, then initialize lnk */ 4253 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4254 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4255 4256 current_space->array += bnzi; 4257 current_space->local_used += bnzi; 4258 current_space->local_remaining -= bnzi; 4259 4260 bi[i+1] = bi[i] + bnzi; 4261 } 4262 4263 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4264 4265 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4266 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4267 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4268 4269 /* create symbolic parallel matrix B_mpi */ 4270 /*---------------------------------------*/ 4271 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4272 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4273 if (n==PETSC_DECIDE) { 4274 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4275 } else { 4276 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4277 } 4278 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4279 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4280 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4281 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4282 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4283 4284 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4285 B_mpi->assembled = PETSC_FALSE; 4286 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4287 merge->bi = bi; 4288 merge->bj = bj; 4289 merge->buf_ri = buf_ri; 4290 merge->buf_rj = buf_rj; 4291 merge->coi = NULL; 4292 merge->coj = NULL; 4293 merge->owners_co = NULL; 4294 4295 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4296 4297 /* attach the supporting struct to B_mpi for reuse */ 4298 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4299 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4300 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4301 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4302 *mpimat = B_mpi; 4303 4304 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4305 PetscFunctionReturn(0); 4306 } 4307 4308 #undef __FUNCT__ 4309 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4310 /*@C 4311 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4312 matrices from each processor 4313 4314 Collective on MPI_Comm 4315 4316 Input Parameters: 4317 + comm - the communicators the parallel matrix will live on 4318 . seqmat - the input sequential matrices 4319 . m - number of local rows (or PETSC_DECIDE) 4320 . n - number of local columns (or PETSC_DECIDE) 4321 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4322 4323 Output Parameter: 4324 . mpimat - the parallel matrix generated 4325 4326 Level: advanced 4327 4328 Notes: 4329 The dimensions of the sequential matrix in each processor MUST be the same. 4330 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4331 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4332 @*/ 4333 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4334 { 4335 PetscErrorCode ierr; 4336 PetscMPIInt size; 4337 4338 PetscFunctionBegin; 4339 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4340 if (size == 1) { 4341 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4342 if (scall == MAT_INITIAL_MATRIX) { 4343 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4344 } else { 4345 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4346 } 4347 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4348 PetscFunctionReturn(0); 4349 } 4350 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4351 if (scall == MAT_INITIAL_MATRIX) { 4352 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4353 } 4354 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4355 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4361 /*@ 4362 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4363 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4364 with MatGetSize() 4365 4366 Not Collective 4367 4368 Input Parameters: 4369 + A - the matrix 4370 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4371 4372 Output Parameter: 4373 . A_loc - the local sequential matrix generated 4374 4375 Level: developer 4376 4377 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4378 4379 @*/ 4380 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4381 { 4382 PetscErrorCode ierr; 4383 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4384 Mat_SeqAIJ *mat,*a,*b; 4385 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4386 MatScalar *aa,*ba,*cam; 4387 PetscScalar *ca; 4388 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4389 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4390 PetscBool match; 4391 MPI_Comm comm; 4392 PetscMPIInt size; 4393 4394 PetscFunctionBegin; 4395 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4396 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4397 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4398 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4399 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4400 4401 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4402 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4403 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4404 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4405 aa = a->a; ba = b->a; 4406 if (scall == MAT_INITIAL_MATRIX) { 4407 if (size == 1) { 4408 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4409 PetscFunctionReturn(0); 4410 } 4411 4412 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4413 ci[0] = 0; 4414 for (i=0; i<am; i++) { 4415 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4416 } 4417 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4418 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4419 k = 0; 4420 for (i=0; i<am; i++) { 4421 ncols_o = bi[i+1] - bi[i]; 4422 ncols_d = ai[i+1] - ai[i]; 4423 /* off-diagonal portion of A */ 4424 for (jo=0; jo<ncols_o; jo++) { 4425 col = cmap[*bj]; 4426 if (col >= cstart) break; 4427 cj[k] = col; bj++; 4428 ca[k++] = *ba++; 4429 } 4430 /* diagonal portion of A */ 4431 for (j=0; j<ncols_d; j++) { 4432 cj[k] = cstart + *aj++; 4433 ca[k++] = *aa++; 4434 } 4435 /* off-diagonal portion of A */ 4436 for (j=jo; j<ncols_o; j++) { 4437 cj[k] = cmap[*bj++]; 4438 ca[k++] = *ba++; 4439 } 4440 } 4441 /* put together the new matrix */ 4442 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4443 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4444 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4445 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4446 mat->free_a = PETSC_TRUE; 4447 mat->free_ij = PETSC_TRUE; 4448 mat->nonew = 0; 4449 } else if (scall == MAT_REUSE_MATRIX) { 4450 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4451 ci = mat->i; cj = mat->j; cam = mat->a; 4452 for (i=0; i<am; i++) { 4453 /* off-diagonal portion of A */ 4454 ncols_o = bi[i+1] - bi[i]; 4455 for (jo=0; jo<ncols_o; jo++) { 4456 col = cmap[*bj]; 4457 if (col >= cstart) break; 4458 *cam++ = *ba++; bj++; 4459 } 4460 /* diagonal portion of A */ 4461 ncols_d = ai[i+1] - ai[i]; 4462 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4463 /* off-diagonal portion of A */ 4464 for (j=jo; j<ncols_o; j++) { 4465 *cam++ = *ba++; bj++; 4466 } 4467 } 4468 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4469 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4470 PetscFunctionReturn(0); 4471 } 4472 4473 #undef __FUNCT__ 4474 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4475 /*@C 4476 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4477 4478 Not Collective 4479 4480 Input Parameters: 4481 + A - the matrix 4482 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4483 - row, col - index sets of rows and columns to extract (or NULL) 4484 4485 Output Parameter: 4486 . A_loc - the local sequential matrix generated 4487 4488 Level: developer 4489 4490 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4491 4492 @*/ 4493 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4494 { 4495 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4496 PetscErrorCode ierr; 4497 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4498 IS isrowa,iscola; 4499 Mat *aloc; 4500 PetscBool match; 4501 4502 PetscFunctionBegin; 4503 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4504 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4505 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4506 if (!row) { 4507 start = A->rmap->rstart; end = A->rmap->rend; 4508 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4509 } else { 4510 isrowa = *row; 4511 } 4512 if (!col) { 4513 start = A->cmap->rstart; 4514 cmap = a->garray; 4515 nzA = a->A->cmap->n; 4516 nzB = a->B->cmap->n; 4517 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4518 ncols = 0; 4519 for (i=0; i<nzB; i++) { 4520 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4521 else break; 4522 } 4523 imark = i; 4524 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4525 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4526 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4527 } else { 4528 iscola = *col; 4529 } 4530 if (scall != MAT_INITIAL_MATRIX) { 4531 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4532 aloc[0] = *A_loc; 4533 } 4534 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4535 *A_loc = aloc[0]; 4536 ierr = PetscFree(aloc);CHKERRQ(ierr); 4537 if (!row) { 4538 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4539 } 4540 if (!col) { 4541 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4542 } 4543 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatGetBrowsOfAcols" 4549 /*@C 4550 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4551 4552 Collective on Mat 4553 4554 Input Parameters: 4555 + A,B - the matrices in mpiaij format 4556 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4557 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4558 4559 Output Parameter: 4560 + rowb, colb - index sets of rows and columns of B to extract 4561 - B_seq - the sequential matrix generated 4562 4563 Level: developer 4564 4565 @*/ 4566 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4567 { 4568 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4569 PetscErrorCode ierr; 4570 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4571 IS isrowb,iscolb; 4572 Mat *bseq=NULL; 4573 4574 PetscFunctionBegin; 4575 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4576 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4577 } 4578 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4579 4580 if (scall == MAT_INITIAL_MATRIX) { 4581 start = A->cmap->rstart; 4582 cmap = a->garray; 4583 nzA = a->A->cmap->n; 4584 nzB = a->B->cmap->n; 4585 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4586 ncols = 0; 4587 for (i=0; i<nzB; i++) { /* row < local row index */ 4588 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4589 else break; 4590 } 4591 imark = i; 4592 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4593 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4594 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4595 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4596 } else { 4597 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4598 isrowb = *rowb; iscolb = *colb; 4599 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4600 bseq[0] = *B_seq; 4601 } 4602 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4603 *B_seq = bseq[0]; 4604 ierr = PetscFree(bseq);CHKERRQ(ierr); 4605 if (!rowb) { 4606 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4607 } else { 4608 *rowb = isrowb; 4609 } 4610 if (!colb) { 4611 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4612 } else { 4613 *colb = iscolb; 4614 } 4615 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4616 PetscFunctionReturn(0); 4617 } 4618 4619 #undef __FUNCT__ 4620 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4621 /* 4622 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4623 of the OFF-DIAGONAL portion of local A 4624 4625 Collective on Mat 4626 4627 Input Parameters: 4628 + A,B - the matrices in mpiaij format 4629 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4630 4631 Output Parameter: 4632 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4633 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4634 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4635 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4636 4637 Level: developer 4638 4639 */ 4640 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4641 { 4642 VecScatter_MPI_General *gen_to,*gen_from; 4643 PetscErrorCode ierr; 4644 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4645 Mat_SeqAIJ *b_oth; 4646 VecScatter ctx =a->Mvctx; 4647 MPI_Comm comm; 4648 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4649 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4650 PetscScalar *rvalues,*svalues; 4651 MatScalar *b_otha,*bufa,*bufA; 4652 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4653 MPI_Request *rwaits = NULL,*swaits = NULL; 4654 MPI_Status *sstatus,rstatus; 4655 PetscMPIInt jj,size; 4656 PetscInt *cols,sbs,rbs; 4657 PetscScalar *vals; 4658 4659 PetscFunctionBegin; 4660 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4662 4663 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4664 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4665 } 4666 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4667 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4668 4669 gen_to = (VecScatter_MPI_General*)ctx->todata; 4670 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4671 rvalues = gen_from->values; /* holds the length of receiving row */ 4672 svalues = gen_to->values; /* holds the length of sending row */ 4673 nrecvs = gen_from->n; 4674 nsends = gen_to->n; 4675 4676 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4677 srow = gen_to->indices; /* local row index to be sent */ 4678 sstarts = gen_to->starts; 4679 sprocs = gen_to->procs; 4680 sstatus = gen_to->sstatus; 4681 sbs = gen_to->bs; 4682 rstarts = gen_from->starts; 4683 rprocs = gen_from->procs; 4684 rbs = gen_from->bs; 4685 4686 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4687 if (scall == MAT_INITIAL_MATRIX) { 4688 /* i-array */ 4689 /*---------*/ 4690 /* post receives */ 4691 for (i=0; i<nrecvs; i++) { 4692 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4693 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4694 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4695 } 4696 4697 /* pack the outgoing message */ 4698 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4699 4700 sstartsj[0] = 0; 4701 rstartsj[0] = 0; 4702 len = 0; /* total length of j or a array to be sent */ 4703 k = 0; 4704 for (i=0; i<nsends; i++) { 4705 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4706 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4707 for (j=0; j<nrows; j++) { 4708 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4709 for (l=0; l<sbs; l++) { 4710 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4711 4712 rowlen[j*sbs+l] = ncols; 4713 4714 len += ncols; 4715 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4716 } 4717 k++; 4718 } 4719 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4720 4721 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4722 } 4723 /* recvs and sends of i-array are completed */ 4724 i = nrecvs; 4725 while (i--) { 4726 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4727 } 4728 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4729 4730 /* allocate buffers for sending j and a arrays */ 4731 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4732 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4733 4734 /* create i-array of B_oth */ 4735 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4736 4737 b_othi[0] = 0; 4738 len = 0; /* total length of j or a array to be received */ 4739 k = 0; 4740 for (i=0; i<nrecvs; i++) { 4741 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4742 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4743 for (j=0; j<nrows; j++) { 4744 b_othi[k+1] = b_othi[k] + rowlen[j]; 4745 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4746 k++; 4747 } 4748 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4749 } 4750 4751 /* allocate space for j and a arrrays of B_oth */ 4752 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4753 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4754 4755 /* j-array */ 4756 /*---------*/ 4757 /* post receives of j-array */ 4758 for (i=0; i<nrecvs; i++) { 4759 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4760 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4761 } 4762 4763 /* pack the outgoing message j-array */ 4764 k = 0; 4765 for (i=0; i<nsends; i++) { 4766 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4767 bufJ = bufj+sstartsj[i]; 4768 for (j=0; j<nrows; j++) { 4769 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4770 for (ll=0; ll<sbs; ll++) { 4771 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4772 for (l=0; l<ncols; l++) { 4773 *bufJ++ = cols[l]; 4774 } 4775 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4776 } 4777 } 4778 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4779 } 4780 4781 /* recvs and sends of j-array are completed */ 4782 i = nrecvs; 4783 while (i--) { 4784 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4785 } 4786 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4787 } else if (scall == MAT_REUSE_MATRIX) { 4788 sstartsj = *startsj_s; 4789 rstartsj = *startsj_r; 4790 bufa = *bufa_ptr; 4791 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4792 b_otha = b_oth->a; 4793 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4794 4795 /* a-array */ 4796 /*---------*/ 4797 /* post receives of a-array */ 4798 for (i=0; i<nrecvs; i++) { 4799 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4800 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4801 } 4802 4803 /* pack the outgoing message a-array */ 4804 k = 0; 4805 for (i=0; i<nsends; i++) { 4806 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4807 bufA = bufa+sstartsj[i]; 4808 for (j=0; j<nrows; j++) { 4809 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4810 for (ll=0; ll<sbs; ll++) { 4811 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4812 for (l=0; l<ncols; l++) { 4813 *bufA++ = vals[l]; 4814 } 4815 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4816 } 4817 } 4818 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4819 } 4820 /* recvs and sends of a-array are completed */ 4821 i = nrecvs; 4822 while (i--) { 4823 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4824 } 4825 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4826 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4827 4828 if (scall == MAT_INITIAL_MATRIX) { 4829 /* put together the new matrix */ 4830 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4831 4832 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4833 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4834 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4835 b_oth->free_a = PETSC_TRUE; 4836 b_oth->free_ij = PETSC_TRUE; 4837 b_oth->nonew = 0; 4838 4839 ierr = PetscFree(bufj);CHKERRQ(ierr); 4840 if (!startsj_s || !bufa_ptr) { 4841 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4842 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4843 } else { 4844 *startsj_s = sstartsj; 4845 *startsj_r = rstartsj; 4846 *bufa_ptr = bufa; 4847 } 4848 } 4849 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 4853 #undef __FUNCT__ 4854 #define __FUNCT__ "MatGetCommunicationStructs" 4855 /*@C 4856 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4857 4858 Not Collective 4859 4860 Input Parameters: 4861 . A - The matrix in mpiaij format 4862 4863 Output Parameter: 4864 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4865 . colmap - A map from global column index to local index into lvec 4866 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4867 4868 Level: developer 4869 4870 @*/ 4871 #if defined(PETSC_USE_CTABLE) 4872 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4873 #else 4874 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4875 #endif 4876 { 4877 Mat_MPIAIJ *a; 4878 4879 PetscFunctionBegin; 4880 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4881 PetscValidPointer(lvec, 2); 4882 PetscValidPointer(colmap, 3); 4883 PetscValidPointer(multScatter, 4); 4884 a = (Mat_MPIAIJ*) A->data; 4885 if (lvec) *lvec = a->lvec; 4886 if (colmap) *colmap = a->colmap; 4887 if (multScatter) *multScatter = a->Mvctx; 4888 PetscFunctionReturn(0); 4889 } 4890 4891 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4892 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4894 #if defined(PETSC_HAVE_ELEMENTAL) 4895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4896 #endif 4897 #if defined(PETSC_HAVE_HYPRE) 4898 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4899 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 4900 #endif 4901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4905 /* 4906 Computes (B'*A')' since computing B*A directly is untenable 4907 4908 n p p 4909 ( ) ( ) ( ) 4910 m ( A ) * n ( B ) = m ( C ) 4911 ( ) ( ) ( ) 4912 4913 */ 4914 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4915 { 4916 PetscErrorCode ierr; 4917 Mat At,Bt,Ct; 4918 4919 PetscFunctionBegin; 4920 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4921 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4922 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4923 ierr = MatDestroy(&At);CHKERRQ(ierr); 4924 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4925 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4926 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 4930 #undef __FUNCT__ 4931 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4932 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4933 { 4934 PetscErrorCode ierr; 4935 PetscInt m=A->rmap->n,n=B->cmap->n; 4936 Mat Cmat; 4937 4938 PetscFunctionBegin; 4939 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4940 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4941 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4942 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4943 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4944 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4945 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4946 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4947 4948 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4949 4950 *C = Cmat; 4951 PetscFunctionReturn(0); 4952 } 4953 4954 /* ----------------------------------------------------------------*/ 4955 #undef __FUNCT__ 4956 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4957 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4958 { 4959 PetscErrorCode ierr; 4960 4961 PetscFunctionBegin; 4962 if (scall == MAT_INITIAL_MATRIX) { 4963 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4964 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4965 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4966 } 4967 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4968 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4969 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4970 PetscFunctionReturn(0); 4971 } 4972 4973 /*MC 4974 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4975 4976 Options Database Keys: 4977 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4978 4979 Level: beginner 4980 4981 .seealso: MatCreateAIJ() 4982 M*/ 4983 4984 #undef __FUNCT__ 4985 #define __FUNCT__ "MatCreate_MPIAIJ" 4986 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4987 { 4988 Mat_MPIAIJ *b; 4989 PetscErrorCode ierr; 4990 PetscMPIInt size; 4991 4992 PetscFunctionBegin; 4993 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4994 4995 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4996 B->data = (void*)b; 4997 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4998 B->assembled = PETSC_FALSE; 4999 B->insertmode = NOT_SET_VALUES; 5000 b->size = size; 5001 5002 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5003 5004 /* build cache for off array entries formed */ 5005 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5006 5007 b->donotstash = PETSC_FALSE; 5008 b->colmap = 0; 5009 b->garray = 0; 5010 b->roworiented = PETSC_TRUE; 5011 5012 /* stuff used for matrix vector multiply */ 5013 b->lvec = NULL; 5014 b->Mvctx = NULL; 5015 5016 /* stuff for MatGetRow() */ 5017 b->rowindices = 0; 5018 b->rowvalues = 0; 5019 b->getrowactive = PETSC_FALSE; 5020 5021 /* flexible pointer used in CUSP/CUSPARSE classes */ 5022 b->spptr = NULL; 5023 5024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5029 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5031 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5032 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5033 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5034 #if defined(PETSC_HAVE_ELEMENTAL) 5035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5036 #endif 5037 #if defined(PETSC_HAVE_HYPRE) 5038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5039 #endif 5040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5044 #if defined(PETSC_HAVE_HYPRE) 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5046 #endif 5047 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5048 PetscFunctionReturn(0); 5049 } 5050 5051 #undef __FUNCT__ 5052 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5053 /*@C 5054 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5055 and "off-diagonal" part of the matrix in CSR format. 5056 5057 Collective on MPI_Comm 5058 5059 Input Parameters: 5060 + comm - MPI communicator 5061 . m - number of local rows (Cannot be PETSC_DECIDE) 5062 . n - This value should be the same as the local size used in creating the 5063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5064 calculated if N is given) For square matrices n is almost always m. 5065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5067 . i - row indices for "diagonal" portion of matrix 5068 . j - column indices 5069 . a - matrix values 5070 . oi - row indices for "off-diagonal" portion of matrix 5071 . oj - column indices 5072 - oa - matrix values 5073 5074 Output Parameter: 5075 . mat - the matrix 5076 5077 Level: advanced 5078 5079 Notes: 5080 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5081 must free the arrays once the matrix has been destroyed and not before. 5082 5083 The i and j indices are 0 based 5084 5085 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5086 5087 This sets local rows and cannot be used to set off-processor values. 5088 5089 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5090 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5091 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5092 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5093 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5094 communication if it is known that only local entries will be set. 5095 5096 .keywords: matrix, aij, compressed row, sparse, parallel 5097 5098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5099 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5100 @*/ 5101 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5102 { 5103 PetscErrorCode ierr; 5104 Mat_MPIAIJ *maij; 5105 5106 PetscFunctionBegin; 5107 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5108 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5109 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5110 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5111 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5112 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5113 maij = (Mat_MPIAIJ*) (*mat)->data; 5114 5115 (*mat)->preallocated = PETSC_TRUE; 5116 5117 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5118 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5119 5120 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5121 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5122 5123 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5124 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5125 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5126 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5127 5128 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5129 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5130 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /* 5135 Special version for direct calls from Fortran 5136 */ 5137 #include <petsc/private/fortranimpl.h> 5138 5139 /* Change these macros so can be used in void function */ 5140 #undef CHKERRQ 5141 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5142 #undef SETERRQ2 5143 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5144 #undef SETERRQ3 5145 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5146 #undef SETERRQ 5147 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5148 5149 #undef __FUNCT__ 5150 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5151 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5152 #define __FUNCT__ "MATSETVALUESMPIAIJ" 5153 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5154 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5155 #define __FUNCT__ "matsetvaluesmpiaij" 5156 #else 5157 #define __FUNCT__ "matsetvaluesmpiaij_" 5158 #endif 5159 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5160 { 5161 Mat mat = *mmat; 5162 PetscInt m = *mm, n = *mn; 5163 InsertMode addv = *maddv; 5164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5165 PetscScalar value; 5166 PetscErrorCode ierr; 5167 5168 MatCheckPreallocated(mat,1); 5169 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5170 5171 #if defined(PETSC_USE_DEBUG) 5172 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5173 #endif 5174 { 5175 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5176 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5177 PetscBool roworiented = aij->roworiented; 5178 5179 /* Some Variables required in the macro */ 5180 Mat A = aij->A; 5181 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5182 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5183 MatScalar *aa = a->a; 5184 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5185 Mat B = aij->B; 5186 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5187 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5188 MatScalar *ba = b->a; 5189 5190 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5191 PetscInt nonew = a->nonew; 5192 MatScalar *ap1,*ap2; 5193 5194 PetscFunctionBegin; 5195 for (i=0; i<m; i++) { 5196 if (im[i] < 0) continue; 5197 #if defined(PETSC_USE_DEBUG) 5198 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5199 #endif 5200 if (im[i] >= rstart && im[i] < rend) { 5201 row = im[i] - rstart; 5202 lastcol1 = -1; 5203 rp1 = aj + ai[row]; 5204 ap1 = aa + ai[row]; 5205 rmax1 = aimax[row]; 5206 nrow1 = ailen[row]; 5207 low1 = 0; 5208 high1 = nrow1; 5209 lastcol2 = -1; 5210 rp2 = bj + bi[row]; 5211 ap2 = ba + bi[row]; 5212 rmax2 = bimax[row]; 5213 nrow2 = bilen[row]; 5214 low2 = 0; 5215 high2 = nrow2; 5216 5217 for (j=0; j<n; j++) { 5218 if (roworiented) value = v[i*n+j]; 5219 else value = v[i+j*m]; 5220 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5221 if (in[j] >= cstart && in[j] < cend) { 5222 col = in[j] - cstart; 5223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5224 } else if (in[j] < 0) continue; 5225 #if defined(PETSC_USE_DEBUG) 5226 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5227 #endif 5228 else { 5229 if (mat->was_assembled) { 5230 if (!aij->colmap) { 5231 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5232 } 5233 #if defined(PETSC_USE_CTABLE) 5234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5235 col--; 5236 #else 5237 col = aij->colmap[in[j]] - 1; 5238 #endif 5239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5240 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5241 col = in[j]; 5242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5243 B = aij->B; 5244 b = (Mat_SeqAIJ*)B->data; 5245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5246 rp2 = bj + bi[row]; 5247 ap2 = ba + bi[row]; 5248 rmax2 = bimax[row]; 5249 nrow2 = bilen[row]; 5250 low2 = 0; 5251 high2 = nrow2; 5252 bm = aij->B->rmap->n; 5253 ba = b->a; 5254 } 5255 } else col = in[j]; 5256 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5257 } 5258 } 5259 } else if (!aij->donotstash) { 5260 if (roworiented) { 5261 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5262 } else { 5263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5264 } 5265 } 5266 } 5267 } 5268 PetscFunctionReturnVoid(); 5269 } 5270 5271