1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1168 PetscErrorCode ierr; 1169 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1170 int fd; 1171 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1172 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1173 PetscScalar *column_values; 1174 PetscInt message_count,flowcontrolcount; 1175 FILE *file; 1176 1177 PetscFunctionBegin; 1178 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1180 nz = A->nz + B->nz; 1181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1182 if (!rank) { 1183 header[0] = MAT_FILE_CLASSID; 1184 header[1] = mat->rmap->N; 1185 header[2] = mat->cmap->N; 1186 1187 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1188 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 /* get largest number of rows any processor has */ 1190 rlen = mat->rmap->n; 1191 range = mat->rmap->range; 1192 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1193 } else { 1194 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 rlen = mat->rmap->n; 1196 } 1197 1198 /* load up the local row counts */ 1199 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1200 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1201 1202 /* store the row lengths to the file */ 1203 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1204 if (!rank) { 1205 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 rlen = range[i+1] - range[i]; 1209 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1211 } 1212 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1213 } else { 1214 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1215 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1216 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1217 } 1218 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1219 1220 /* load up the local column indices */ 1221 nzmax = nz; /* th processor needs space a largest processor needs */ 1222 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1224 cnt = 0; 1225 for (i=0; i<mat->rmap->n; i++) { 1226 for (j=B->i[i]; j<B->i[i+1]; j++) { 1227 if ((col = garray[B->j[j]]) > cstart) break; 1228 column_indices[cnt++] = col; 1229 } 1230 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1231 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1232 } 1233 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1234 1235 /* store the column indices to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 MPI_Status status; 1239 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 for (i=1; i<size; i++) { 1241 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1242 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1243 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1244 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1255 1256 /* load up the local column values */ 1257 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1258 cnt = 0; 1259 for (i=0; i<mat->rmap->n; i++) { 1260 for (j=B->i[i]; j<B->i[i+1]; j++) { 1261 if (garray[B->j[j]] > cstart) break; 1262 column_values[cnt++] = B->a[j]; 1263 } 1264 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1265 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1266 } 1267 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1268 1269 /* store the column values to the file */ 1270 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1271 if (!rank) { 1272 MPI_Status status; 1273 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1274 for (i=1; i<size; i++) { 1275 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1276 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1277 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1278 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1280 } 1281 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1282 } else { 1283 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1284 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1286 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1287 } 1288 ierr = PetscFree(column_values);CHKERRQ(ierr); 1289 1290 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1291 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #include <petscdraw.h> 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1301 PetscErrorCode ierr; 1302 PetscMPIInt rank = aij->rank,size = aij->size; 1303 PetscBool isdraw,iascii,isbinary; 1304 PetscViewer sviewer; 1305 PetscViewerFormat format; 1306 1307 PetscFunctionBegin; 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1311 if (iascii) { 1312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1313 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1314 MatInfo info; 1315 PetscBool inodes; 1316 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1321 if (!inodes) { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1335 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1338 PetscInt inodecount,inodelimit,*inodes; 1339 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1340 if (inodes) { 1341 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1347 PetscFunctionReturn(0); 1348 } 1349 } else if (isbinary) { 1350 if (size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1362 if (isnull) PetscFunctionReturn(0); 1363 } 1364 1365 { 1366 /* assemble the entire matrix onto first processor. */ 1367 Mat A; 1368 Mat_SeqAIJ *Aloc; 1369 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1370 MatScalar *a; 1371 1372 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1373 if (!rank) { 1374 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1377 } 1378 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1379 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1381 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)aij->A->data; 1386 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1387 row = mat->rmap->rstart; 1388 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1393 } 1394 aj = Aloc->j; 1395 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1396 1397 /* copy over the B part */ 1398 Aloc = (Mat_SeqAIJ*)aij->B->data; 1399 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1400 row = mat->rmap->rstart; 1401 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1402 ct = cols; 1403 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 /* 1413 Everyone has to call to draw the matrix since the graphics waits are 1414 synchronized across all processors that share the PetscDraw object 1415 */ 1416 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1417 if (!rank) { 1418 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1419 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1420 } 1421 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1423 ierr = MatDestroy(&A);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatView_MPIAIJ" 1430 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1431 { 1432 PetscErrorCode ierr; 1433 PetscBool iascii,isdraw,issocket,isbinary; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1440 if (iascii || isdraw || isbinary || issocket) { 1441 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1442 } 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatSOR_MPIAIJ" 1448 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1449 { 1450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1451 PetscErrorCode ierr; 1452 Vec bb1 = 0; 1453 PetscBool hasop; 1454 1455 PetscFunctionBegin; 1456 if (flag == SOR_APPLY_UPPER) { 1457 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1462 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1463 } 1464 1465 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1466 if (flag & SOR_ZERO_INITIAL_GUESS) { 1467 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1468 its--; 1469 } 1470 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 while (its--) { 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 1507 /* update rhs: bb1 = bb - B*x */ 1508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1510 1511 /* local sweep */ 1512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1513 } 1514 } else if (flag & SOR_EISENSTAT) { 1515 Vec xx1; 1516 1517 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1518 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1519 1520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 if (!mat->diag) { 1523 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1524 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1525 } 1526 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1527 if (hasop) { 1528 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1529 } else { 1530 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1531 } 1532 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1533 1534 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1535 1536 /* local sweep */ 1537 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1538 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1539 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1540 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1541 1542 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1543 1544 matin->errortype = mat->A->errortype; 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatPermute_MPIAIJ" 1550 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1551 { 1552 Mat aA,aB,Aperm; 1553 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1554 PetscScalar *aa,*ba; 1555 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1556 PetscSF rowsf,sf; 1557 IS parcolp = NULL; 1558 PetscBool done; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1563 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1564 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1565 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1566 1567 /* Invert row permutation to find out where my rows should go */ 1568 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1569 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1570 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1571 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1572 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 1575 /* Invert column permutation to find out where my columns should go */ 1576 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1577 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1578 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1579 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1580 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1582 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1583 1584 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1585 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1586 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1587 1588 /* Find out where my gcols should go */ 1589 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1590 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1592 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1593 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1594 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1595 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1596 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1597 1598 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1599 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1600 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1601 for (i=0; i<m; i++) { 1602 PetscInt row = rdest[i],rowner; 1603 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1604 for (j=ai[i]; j<ai[i+1]; j++) { 1605 PetscInt cowner,col = cdest[aj[j]]; 1606 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1607 if (rowner == cowner) dnnz[i]++; 1608 else onnz[i]++; 1609 } 1610 for (j=bi[i]; j<bi[i+1]; j++) { 1611 PetscInt cowner,col = gcdest[bj[j]]; 1612 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1613 if (rowner == cowner) dnnz[i]++; 1614 else onnz[i]++; 1615 } 1616 } 1617 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1618 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1619 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1620 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1621 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1622 1623 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1624 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1625 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1626 for (i=0; i<m; i++) { 1627 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1628 PetscInt j0,rowlen; 1629 rowlen = ai[i+1] - ai[i]; 1630 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1631 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1632 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1633 } 1634 rowlen = bi[i+1] - bi[i]; 1635 for (j0=j=0; j<rowlen; j0=j) { 1636 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1637 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1638 } 1639 } 1640 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1642 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1643 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1644 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1645 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1646 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1647 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1648 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1649 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1650 *B = Aperm; 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #undef __FUNCT__ 1655 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1656 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1657 { 1658 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1659 PetscErrorCode ierr; 1660 1661 PetscFunctionBegin; 1662 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1663 if (ghosts) *ghosts = aij->garray; 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1669 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1670 { 1671 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1672 Mat A = mat->A,B = mat->B; 1673 PetscErrorCode ierr; 1674 PetscReal isend[5],irecv[5]; 1675 1676 PetscFunctionBegin; 1677 info->block_size = 1.0; 1678 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1679 1680 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1681 isend[3] = info->memory; isend[4] = info->mallocs; 1682 1683 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1684 1685 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1686 isend[3] += info->memory; isend[4] += info->mallocs; 1687 if (flag == MAT_LOCAL) { 1688 info->nz_used = isend[0]; 1689 info->nz_allocated = isend[1]; 1690 info->nz_unneeded = isend[2]; 1691 info->memory = isend[3]; 1692 info->mallocs = isend[4]; 1693 } else if (flag == MAT_GLOBAL_MAX) { 1694 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1695 1696 info->nz_used = irecv[0]; 1697 info->nz_allocated = irecv[1]; 1698 info->nz_unneeded = irecv[2]; 1699 info->memory = irecv[3]; 1700 info->mallocs = irecv[4]; 1701 } else if (flag == MAT_GLOBAL_SUM) { 1702 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1703 1704 info->nz_used = irecv[0]; 1705 info->nz_allocated = irecv[1]; 1706 info->nz_unneeded = irecv[2]; 1707 info->memory = irecv[3]; 1708 info->mallocs = irecv[4]; 1709 } 1710 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1711 info->fill_ratio_needed = 0; 1712 info->factor_mallocs = 0; 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatSetOption_MPIAIJ" 1718 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1719 { 1720 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 switch (op) { 1725 case MAT_NEW_NONZERO_LOCATIONS: 1726 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1727 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1728 case MAT_KEEP_NONZERO_PATTERN: 1729 case MAT_NEW_NONZERO_LOCATION_ERR: 1730 case MAT_USE_INODES: 1731 case MAT_IGNORE_ZERO_ENTRIES: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_ROW_ORIENTED: 1737 MatCheckPreallocated(A,1); 1738 a->roworiented = flg; 1739 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_NEW_DIAGONALS: 1744 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1745 break; 1746 case MAT_IGNORE_OFF_PROC_ENTRIES: 1747 a->donotstash = flg; 1748 break; 1749 case MAT_SPD: 1750 A->spd_set = PETSC_TRUE; 1751 A->spd = flg; 1752 if (flg) { 1753 A->symmetric = PETSC_TRUE; 1754 A->structurally_symmetric = PETSC_TRUE; 1755 A->symmetric_set = PETSC_TRUE; 1756 A->structurally_symmetric_set = PETSC_TRUE; 1757 } 1758 break; 1759 case MAT_SYMMETRIC: 1760 MatCheckPreallocated(A,1); 1761 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1762 break; 1763 case MAT_STRUCTURALLY_SYMMETRIC: 1764 MatCheckPreallocated(A,1); 1765 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_HERMITIAN: 1768 MatCheckPreallocated(A,1); 1769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1770 break; 1771 case MAT_SYMMETRY_ETERNAL: 1772 MatCheckPreallocated(A,1); 1773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1774 break; 1775 default: 1776 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatGetRow_MPIAIJ" 1783 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1784 { 1785 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1786 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1787 PetscErrorCode ierr; 1788 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1789 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1790 PetscInt *cmap,*idx_p; 1791 1792 PetscFunctionBegin; 1793 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1794 mat->getrowactive = PETSC_TRUE; 1795 1796 if (!mat->rowvalues && (idx || v)) { 1797 /* 1798 allocate enough space to hold information from the longest row. 1799 */ 1800 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1801 PetscInt max = 1,tmp; 1802 for (i=0; i<matin->rmap->n; i++) { 1803 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1804 if (max < tmp) max = tmp; 1805 } 1806 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1807 } 1808 1809 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1810 lrow = row - rstart; 1811 1812 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1813 if (!v) {pvA = 0; pvB = 0;} 1814 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1815 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1816 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1817 nztot = nzA + nzB; 1818 1819 cmap = mat->garray; 1820 if (v || idx) { 1821 if (nztot) { 1822 /* Sort by increasing column numbers, assuming A and B already sorted */ 1823 PetscInt imark = -1; 1824 if (v) { 1825 *v = v_p = mat->rowvalues; 1826 for (i=0; i<nzB; i++) { 1827 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1828 else break; 1829 } 1830 imark = i; 1831 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1832 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1833 } 1834 if (idx) { 1835 *idx = idx_p = mat->rowindices; 1836 if (imark > -1) { 1837 for (i=0; i<imark; i++) { 1838 idx_p[i] = cmap[cworkB[i]]; 1839 } 1840 } else { 1841 for (i=0; i<nzB; i++) { 1842 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1843 else break; 1844 } 1845 imark = i; 1846 } 1847 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1848 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1849 } 1850 } else { 1851 if (idx) *idx = 0; 1852 if (v) *v = 0; 1853 } 1854 } 1855 *nz = nztot; 1856 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1857 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1858 PetscFunctionReturn(0); 1859 } 1860 1861 #undef __FUNCT__ 1862 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1863 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1864 { 1865 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1866 1867 PetscFunctionBegin; 1868 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1869 aij->getrowactive = PETSC_FALSE; 1870 PetscFunctionReturn(0); 1871 } 1872 1873 #undef __FUNCT__ 1874 #define __FUNCT__ "MatNorm_MPIAIJ" 1875 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1876 { 1877 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1878 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1879 PetscErrorCode ierr; 1880 PetscInt i,j,cstart = mat->cmap->rstart; 1881 PetscReal sum = 0.0; 1882 MatScalar *v; 1883 1884 PetscFunctionBegin; 1885 if (aij->size == 1) { 1886 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1887 } else { 1888 if (type == NORM_FROBENIUS) { 1889 v = amat->a; 1890 for (i=0; i<amat->nz; i++) { 1891 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1892 } 1893 v = bmat->a; 1894 for (i=0; i<bmat->nz; i++) { 1895 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1896 } 1897 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1898 *norm = PetscSqrtReal(*norm); 1899 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1900 } else if (type == NORM_1) { /* max column norm */ 1901 PetscReal *tmp,*tmp2; 1902 PetscInt *jj,*garray = aij->garray; 1903 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1904 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1905 *norm = 0.0; 1906 v = amat->a; jj = amat->j; 1907 for (j=0; j<amat->nz; j++) { 1908 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1909 } 1910 v = bmat->a; jj = bmat->j; 1911 for (j=0; j<bmat->nz; j++) { 1912 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1913 } 1914 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1915 for (j=0; j<mat->cmap->N; j++) { 1916 if (tmp2[j] > *norm) *norm = tmp2[j]; 1917 } 1918 ierr = PetscFree(tmp);CHKERRQ(ierr); 1919 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1920 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1921 } else if (type == NORM_INFINITY) { /* max row norm */ 1922 PetscReal ntemp = 0.0; 1923 for (j=0; j<aij->A->rmap->n; j++) { 1924 v = amat->a + amat->i[j]; 1925 sum = 0.0; 1926 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1927 sum += PetscAbsScalar(*v); v++; 1928 } 1929 v = bmat->a + bmat->i[j]; 1930 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1931 sum += PetscAbsScalar(*v); v++; 1932 } 1933 if (sum > ntemp) ntemp = sum; 1934 } 1935 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1936 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1937 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatTranspose_MPIAIJ" 1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1945 { 1946 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1947 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1950 PetscInt cstart = A->cmap->rstart,ncol; 1951 Mat B; 1952 MatScalar *array; 1953 1954 PetscFunctionBegin; 1955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1956 1957 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1958 ai = Aloc->i; aj = Aloc->j; 1959 bi = Bloc->i; bj = Bloc->j; 1960 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1961 PetscInt *d_nnz,*g_nnz,*o_nnz; 1962 PetscSFNode *oloc; 1963 PETSC_UNUSED PetscSF sf; 1964 1965 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1966 /* compute d_nnz for preallocation */ 1967 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1968 for (i=0; i<ai[ma]; i++) { 1969 d_nnz[aj[i]]++; 1970 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1971 } 1972 /* compute local off-diagonal contributions */ 1973 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1974 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1975 /* map those to global */ 1976 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1977 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1978 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1979 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1980 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1982 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1983 1984 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1985 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1986 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1987 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1988 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1989 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1990 } else { 1991 B = *matout; 1992 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1993 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1994 } 1995 1996 /* copy over the A part */ 1997 array = Aloc->a; 1998 row = A->rmap->rstart; 1999 for (i=0; i<ma; i++) { 2000 ncol = ai[i+1]-ai[i]; 2001 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2002 row++; 2003 array += ncol; aj += ncol; 2004 } 2005 aj = Aloc->j; 2006 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2007 2008 /* copy over the B part */ 2009 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2010 array = Bloc->a; 2011 row = A->rmap->rstart; 2012 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2013 cols_tmp = cols; 2014 for (i=0; i<mb; i++) { 2015 ncol = bi[i+1]-bi[i]; 2016 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2017 row++; 2018 array += ncol; cols_tmp += ncol; 2019 } 2020 ierr = PetscFree(cols);CHKERRQ(ierr); 2021 2022 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2025 *matout = B; 2026 } else { 2027 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2035 { 2036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2037 Mat a = aij->A,b = aij->B; 2038 PetscErrorCode ierr; 2039 PetscInt s1,s2,s3; 2040 2041 PetscFunctionBegin; 2042 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2043 if (rr) { 2044 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2045 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2046 /* Overlap communication with computation. */ 2047 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2048 } 2049 if (ll) { 2050 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2051 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2052 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2053 } 2054 /* scale the diagonal block */ 2055 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2056 2057 if (rr) { 2058 /* Do a scatter end and then right scale the off-diagonal block */ 2059 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2068 { 2069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatEqual_MPIAIJ" 2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2080 { 2081 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2082 Mat a,b,c,d; 2083 PetscBool flg; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 a = matA->A; b = matA->B; 2088 c = matB->A; d = matB->B; 2089 2090 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2091 if (flg) { 2092 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2093 } 2094 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatCopy_MPIAIJ" 2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2101 { 2102 PetscErrorCode ierr; 2103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2104 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2105 2106 PetscFunctionBegin; 2107 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2108 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2109 /* because of the column compression in the off-processor part of the matrix a->B, 2110 the number of columns in a->B and b->B may be different, hence we cannot call 2111 the MatCopy() directly on the two parts. If need be, we can provide a more 2112 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2113 then copying the submatrices */ 2114 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2115 } else { 2116 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2117 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUp_MPIAIJ" 2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2125 { 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* 2134 Computes the number of nonzeros per row needed for preallocation when X and Y 2135 have different nonzero structure. 2136 */ 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2140 { 2141 PetscInt i,j,k,nzx,nzy; 2142 2143 PetscFunctionBegin; 2144 /* Set the number of nonzeros in the new matrix */ 2145 for (i=0; i<m; i++) { 2146 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2147 nzx = xi[i+1] - xi[i]; 2148 nzy = yi[i+1] - yi[i]; 2149 nnz[i] = 0; 2150 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2151 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2152 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2153 nnz[i]++; 2154 } 2155 for (; k<nzy; k++) nnz[i]++; 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2164 { 2165 PetscErrorCode ierr; 2166 PetscInt m = Y->rmap->N; 2167 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2168 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2169 2170 PetscFunctionBegin; 2171 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatAXPY_MPIAIJ" 2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2178 { 2179 PetscErrorCode ierr; 2180 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2181 PetscBLASInt bnz,one=1; 2182 Mat_SeqAIJ *x,*y; 2183 2184 PetscFunctionBegin; 2185 if (str == SAME_NONZERO_PATTERN) { 2186 PetscScalar alpha = a; 2187 x = (Mat_SeqAIJ*)xx->A->data; 2188 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2189 y = (Mat_SeqAIJ*)yy->A->data; 2190 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2191 x = (Mat_SeqAIJ*)xx->B->data; 2192 y = (Mat_SeqAIJ*)yy->B->data; 2193 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2194 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2195 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2196 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2197 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2198 } else { 2199 Mat B; 2200 PetscInt *nnz_d,*nnz_o; 2201 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2202 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2203 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2204 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2205 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2206 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2207 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2209 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2210 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2211 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2212 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2214 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatConjugate_MPIAIJ" 2223 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2224 { 2225 #if defined(PETSC_USE_COMPLEX) 2226 PetscErrorCode ierr; 2227 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2231 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2232 #else 2233 PetscFunctionBegin; 2234 #endif 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatRealPart_MPIAIJ" 2240 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2241 { 2242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2243 PetscErrorCode ierr; 2244 2245 PetscFunctionBegin; 2246 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2247 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2253 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2254 { 2255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2256 PetscErrorCode ierr; 2257 2258 PetscFunctionBegin; 2259 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2260 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2266 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2267 { 2268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2269 PetscErrorCode ierr; 2270 PetscInt i,*idxb = 0; 2271 PetscScalar *va,*vb; 2272 Vec vtmp; 2273 2274 PetscFunctionBegin; 2275 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2276 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2277 if (idx) { 2278 for (i=0; i<A->rmap->n; i++) { 2279 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2280 } 2281 } 2282 2283 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2284 if (idx) { 2285 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2286 } 2287 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2288 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2289 2290 for (i=0; i<A->rmap->n; i++) { 2291 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2292 va[i] = vb[i]; 2293 if (idx) idx[i] = a->garray[idxb[i]]; 2294 } 2295 } 2296 2297 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2298 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2299 ierr = PetscFree(idxb);CHKERRQ(ierr); 2300 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2306 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 PetscInt i,*idxb = 0; 2311 PetscScalar *va,*vb; 2312 Vec vtmp; 2313 2314 PetscFunctionBegin; 2315 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2316 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2317 if (idx) { 2318 for (i=0; i<A->cmap->n; i++) { 2319 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2320 } 2321 } 2322 2323 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2324 if (idx) { 2325 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2326 } 2327 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2328 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2329 2330 for (i=0; i<A->rmap->n; i++) { 2331 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2332 va[i] = vb[i]; 2333 if (idx) idx[i] = a->garray[idxb[i]]; 2334 } 2335 } 2336 2337 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2338 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2339 ierr = PetscFree(idxb);CHKERRQ(ierr); 2340 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 #undef __FUNCT__ 2345 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2349 PetscInt n = A->rmap->n; 2350 PetscInt cstart = A->cmap->rstart; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 PetscInt r; 2356 PetscErrorCode ierr; 2357 2358 PetscFunctionBegin; 2359 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2361 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2363 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2364 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2365 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2366 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2367 for (r = 0; r < n; ++r) { 2368 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2369 a[r] = diagA[r]; 2370 idx[r] = cstart + diagIdx[r]; 2371 } else { 2372 a[r] = offdiagA[r]; 2373 idx[r] = cmap[offdiagIdx[r]]; 2374 } 2375 } 2376 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2378 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2379 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2380 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2381 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2387 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2388 { 2389 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2390 PetscInt n = A->rmap->n; 2391 PetscInt cstart = A->cmap->rstart; 2392 PetscInt *cmap = mat->garray; 2393 PetscInt *diagIdx, *offdiagIdx; 2394 Vec diagV, offdiagV; 2395 PetscScalar *a, *diagA, *offdiagA; 2396 PetscInt r; 2397 PetscErrorCode ierr; 2398 2399 PetscFunctionBegin; 2400 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2402 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2403 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2404 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2406 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2407 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2408 for (r = 0; r < n; ++r) { 2409 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 idx[r] = cstart + diagIdx[r]; 2412 } else { 2413 a[r] = offdiagA[r]; 2414 idx[r] = cmap[offdiagIdx[r]]; 2415 } 2416 } 2417 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2420 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2421 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2422 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2428 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2429 { 2430 PetscErrorCode ierr; 2431 Mat *dummy; 2432 2433 PetscFunctionBegin; 2434 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2435 *newmat = *dummy; 2436 ierr = PetscFree(dummy);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2442 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2443 { 2444 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2449 A->errortype = a->A->errortype; 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2455 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2456 { 2457 PetscErrorCode ierr; 2458 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2459 2460 PetscFunctionBegin; 2461 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2462 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2463 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2470 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2471 { 2472 PetscFunctionBegin; 2473 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2474 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2475 PetscFunctionReturn(0); 2476 } 2477 2478 #undef __FUNCT__ 2479 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2480 /*@ 2481 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2482 2483 Collective on Mat 2484 2485 Input Parameters: 2486 + A - the matrix 2487 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2488 2489 Level: advanced 2490 2491 @*/ 2492 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2493 { 2494 PetscErrorCode ierr; 2495 2496 PetscFunctionBegin; 2497 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2503 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2504 { 2505 PetscErrorCode ierr; 2506 PetscBool sc = PETSC_FALSE,flg; 2507 2508 PetscFunctionBegin; 2509 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2510 ierr = PetscObjectOptionsBegin((PetscObject)A); 2511 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2512 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2513 if (flg) { 2514 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2515 } 2516 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatShift_MPIAIJ" 2522 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2523 { 2524 PetscErrorCode ierr; 2525 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2526 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2527 2528 PetscFunctionBegin; 2529 if (!Y->preallocated) { 2530 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2531 } else if (!aij->nz) { 2532 PetscInt nonew = aij->nonew; 2533 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2534 aij->nonew = nonew; 2535 } 2536 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2542 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2543 { 2544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2545 PetscErrorCode ierr; 2546 2547 PetscFunctionBegin; 2548 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2549 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2550 if (d) { 2551 PetscInt rstart; 2552 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2553 *d += rstart; 2554 2555 } 2556 PetscFunctionReturn(0); 2557 } 2558 2559 2560 /* -------------------------------------------------------------------*/ 2561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2562 MatGetRow_MPIAIJ, 2563 MatRestoreRow_MPIAIJ, 2564 MatMult_MPIAIJ, 2565 /* 4*/ MatMultAdd_MPIAIJ, 2566 MatMultTranspose_MPIAIJ, 2567 MatMultTransposeAdd_MPIAIJ, 2568 0, 2569 0, 2570 0, 2571 /*10*/ 0, 2572 0, 2573 0, 2574 MatSOR_MPIAIJ, 2575 MatTranspose_MPIAIJ, 2576 /*15*/ MatGetInfo_MPIAIJ, 2577 MatEqual_MPIAIJ, 2578 MatGetDiagonal_MPIAIJ, 2579 MatDiagonalScale_MPIAIJ, 2580 MatNorm_MPIAIJ, 2581 /*20*/ MatAssemblyBegin_MPIAIJ, 2582 MatAssemblyEnd_MPIAIJ, 2583 MatSetOption_MPIAIJ, 2584 MatZeroEntries_MPIAIJ, 2585 /*24*/ MatZeroRows_MPIAIJ, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*29*/ MatSetUp_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*34*/ MatDuplicate_MPIAIJ, 2596 0, 2597 0, 2598 0, 2599 0, 2600 /*39*/ MatAXPY_MPIAIJ, 2601 MatGetSubMatrices_MPIAIJ, 2602 MatIncreaseOverlap_MPIAIJ, 2603 MatGetValues_MPIAIJ, 2604 MatCopy_MPIAIJ, 2605 /*44*/ MatGetRowMax_MPIAIJ, 2606 MatScale_MPIAIJ, 2607 MatShift_MPIAIJ, 2608 MatDiagonalSet_MPIAIJ, 2609 MatZeroRowsColumns_MPIAIJ, 2610 /*49*/ MatSetRandom_MPIAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*54*/ MatFDColoringCreate_MPIXAIJ, 2616 0, 2617 MatSetUnfactored_MPIAIJ, 2618 MatPermute_MPIAIJ, 2619 0, 2620 /*59*/ MatGetSubMatrix_MPIAIJ, 2621 MatDestroy_MPIAIJ, 2622 MatView_MPIAIJ, 2623 0, 2624 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2625 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2626 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2631 MatGetRowMinAbs_MPIAIJ, 2632 0, 2633 MatSetColoring_MPIAIJ, 2634 0, 2635 MatSetValuesAdifor_MPIAIJ, 2636 /*75*/ MatFDColoringApply_AIJ, 2637 MatSetFromOptions_MPIAIJ, 2638 0, 2639 0, 2640 MatFindZeroDiagonals_MPIAIJ, 2641 /*80*/ 0, 2642 0, 2643 0, 2644 /*83*/ MatLoad_MPIAIJ, 2645 0, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2651 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2652 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2653 MatPtAP_MPIAIJ_MPIAIJ, 2654 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2655 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*99*/ 0, 2661 0, 2662 0, 2663 MatConjugate_MPIAIJ, 2664 0, 2665 /*104*/MatSetValuesRow_MPIAIJ, 2666 MatRealPart_MPIAIJ, 2667 MatImaginaryPart_MPIAIJ, 2668 0, 2669 0, 2670 /*109*/0, 2671 0, 2672 MatGetRowMin_MPIAIJ, 2673 0, 2674 MatMissingDiagonal_MPIAIJ, 2675 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2676 0, 2677 MatGetGhosts_MPIAIJ, 2678 0, 2679 0, 2680 /*119*/0, 2681 0, 2682 0, 2683 0, 2684 MatGetMultiProcBlock_MPIAIJ, 2685 /*124*/MatFindNonzeroRows_MPIAIJ, 2686 MatGetColumnNorms_MPIAIJ, 2687 MatInvertBlockDiagonal_MPIAIJ, 2688 0, 2689 MatGetSubMatricesMPI_MPIAIJ, 2690 /*129*/0, 2691 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2692 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2693 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2694 0, 2695 /*134*/0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /*139*/0, 2701 0, 2702 0, 2703 MatFDColoringSetUp_MPIXAIJ, 2704 MatFindOffBlockDiagonalEntries_MPIAIJ, 2705 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2706 }; 2707 2708 /* ----------------------------------------------------------------------------------------*/ 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2713 { 2714 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2725 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2726 { 2727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2728 PetscErrorCode ierr; 2729 2730 PetscFunctionBegin; 2731 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2732 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2733 PetscFunctionReturn(0); 2734 } 2735 2736 #undef __FUNCT__ 2737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2738 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2739 { 2740 Mat_MPIAIJ *b; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2745 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2746 b = (Mat_MPIAIJ*)B->data; 2747 2748 if (!B->preallocated) { 2749 /* Explicitly create 2 MATSEQAIJ matrices. */ 2750 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2751 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2752 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2753 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2754 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2755 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2756 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2757 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2758 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2759 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2760 } 2761 2762 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2763 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2764 B->preallocated = PETSC_TRUE; 2765 PetscFunctionReturn(0); 2766 } 2767 2768 #undef __FUNCT__ 2769 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2770 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2771 { 2772 Mat mat; 2773 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2774 PetscErrorCode ierr; 2775 2776 PetscFunctionBegin; 2777 *newmat = 0; 2778 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2779 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2780 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2781 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2782 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2783 a = (Mat_MPIAIJ*)mat->data; 2784 2785 mat->factortype = matin->factortype; 2786 mat->assembled = PETSC_TRUE; 2787 mat->insertmode = NOT_SET_VALUES; 2788 mat->preallocated = PETSC_TRUE; 2789 2790 a->size = oldmat->size; 2791 a->rank = oldmat->rank; 2792 a->donotstash = oldmat->donotstash; 2793 a->roworiented = oldmat->roworiented; 2794 a->rowindices = 0; 2795 a->rowvalues = 0; 2796 a->getrowactive = PETSC_FALSE; 2797 2798 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2799 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2800 2801 if (oldmat->colmap) { 2802 #if defined(PETSC_USE_CTABLE) 2803 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2804 #else 2805 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2806 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2807 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2808 #endif 2809 } else a->colmap = 0; 2810 if (oldmat->garray) { 2811 PetscInt len; 2812 len = oldmat->B->cmap->n; 2813 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2814 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2815 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2816 } else a->garray = 0; 2817 2818 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2819 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2820 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2821 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2822 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2823 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2824 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2825 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2826 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2827 *newmat = mat; 2828 PetscFunctionReturn(0); 2829 } 2830 2831 2832 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatLoad_MPIAIJ" 2835 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2836 { 2837 PetscScalar *vals,*svals; 2838 MPI_Comm comm; 2839 PetscErrorCode ierr; 2840 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2841 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2842 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2843 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2844 PetscInt cend,cstart,n,*rowners; 2845 int fd; 2846 PetscInt bs = newMat->rmap->bs; 2847 2848 PetscFunctionBegin; 2849 /* force binary viewer to load .info file if it has not yet done so */ 2850 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2851 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2852 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2853 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2854 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2855 if (!rank) { 2856 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2857 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2858 } 2859 2860 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2861 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2862 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2863 if (bs < 0) bs = 1; 2864 2865 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2866 M = header[1]; N = header[2]; 2867 2868 /* If global sizes are set, check if they are consistent with that given in the file */ 2869 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2870 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2871 2872 /* determine ownership of all (block) rows */ 2873 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2874 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2875 else m = newMat->rmap->n; /* Set by user */ 2876 2877 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2878 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2879 2880 /* First process needs enough room for process with most rows */ 2881 if (!rank) { 2882 mmax = rowners[1]; 2883 for (i=2; i<=size; i++) { 2884 mmax = PetscMax(mmax, rowners[i]); 2885 } 2886 } else mmax = -1; /* unused, but compilers complain */ 2887 2888 rowners[0] = 0; 2889 for (i=2; i<=size; i++) { 2890 rowners[i] += rowners[i-1]; 2891 } 2892 rstart = rowners[rank]; 2893 rend = rowners[rank+1]; 2894 2895 /* distribute row lengths to all processors */ 2896 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2897 if (!rank) { 2898 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2899 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2900 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2901 for (j=0; j<m; j++) { 2902 procsnz[0] += ourlens[j]; 2903 } 2904 for (i=1; i<size; i++) { 2905 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2906 /* calculate the number of nonzeros on each processor */ 2907 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2908 procsnz[i] += rowlengths[j]; 2909 } 2910 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2913 } else { 2914 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2915 } 2916 2917 if (!rank) { 2918 /* determine max buffer needed and allocate it */ 2919 maxnz = 0; 2920 for (i=0; i<size; i++) { 2921 maxnz = PetscMax(maxnz,procsnz[i]); 2922 } 2923 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2924 2925 /* read in my part of the matrix column indices */ 2926 nz = procsnz[0]; 2927 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2928 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2929 2930 /* read in every one elses and ship off */ 2931 for (i=1; i<size; i++) { 2932 nz = procsnz[i]; 2933 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2934 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(cols);CHKERRQ(ierr); 2937 } else { 2938 /* determine buffer space needed for message */ 2939 nz = 0; 2940 for (i=0; i<m; i++) { 2941 nz += ourlens[i]; 2942 } 2943 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2944 2945 /* receive message of column indices*/ 2946 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2947 } 2948 2949 /* determine column ownership if matrix is not square */ 2950 if (N != M) { 2951 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2952 else n = newMat->cmap->n; 2953 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2954 cstart = cend - n; 2955 } else { 2956 cstart = rstart; 2957 cend = rend; 2958 n = cend - cstart; 2959 } 2960 2961 /* loop over local rows, determining number of off diagonal entries */ 2962 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2963 jj = 0; 2964 for (i=0; i<m; i++) { 2965 for (j=0; j<ourlens[i]; j++) { 2966 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2967 jj++; 2968 } 2969 } 2970 2971 for (i=0; i<m; i++) { 2972 ourlens[i] -= offlens[i]; 2973 } 2974 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2975 2976 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2977 2978 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2979 2980 for (i=0; i<m; i++) { 2981 ourlens[i] += offlens[i]; 2982 } 2983 2984 if (!rank) { 2985 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2986 2987 /* read in my part of the matrix numerical values */ 2988 nz = procsnz[0]; 2989 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2990 2991 /* insert into matrix */ 2992 jj = rstart; 2993 smycols = mycols; 2994 svals = vals; 2995 for (i=0; i<m; i++) { 2996 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 smycols += ourlens[i]; 2998 svals += ourlens[i]; 2999 jj++; 3000 } 3001 3002 /* read in other processors and ship out */ 3003 for (i=1; i<size; i++) { 3004 nz = procsnz[i]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3007 } 3008 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3009 } else { 3010 /* receive numeric values */ 3011 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3012 3013 /* receive message of values*/ 3014 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3015 3016 /* insert into matrix */ 3017 jj = rstart; 3018 smycols = mycols; 3019 svals = vals; 3020 for (i=0; i<m; i++) { 3021 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3022 smycols += ourlens[i]; 3023 svals += ourlens[i]; 3024 jj++; 3025 } 3026 } 3027 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3028 ierr = PetscFree(vals);CHKERRQ(ierr); 3029 ierr = PetscFree(mycols);CHKERRQ(ierr); 3030 ierr = PetscFree(rowners);CHKERRQ(ierr); 3031 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3038 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3039 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3040 { 3041 PetscErrorCode ierr; 3042 IS iscol_local; 3043 PetscInt csize; 3044 3045 PetscFunctionBegin; 3046 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3047 if (call == MAT_REUSE_MATRIX) { 3048 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3049 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3050 } else { 3051 /* check if we are grabbing all columns*/ 3052 PetscBool isstride; 3053 PetscMPIInt lisstride = 0,gisstride; 3054 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3055 if (isstride) { 3056 PetscInt start,len,mstart,mlen; 3057 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3058 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3059 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3060 if (mstart == start && mlen-mstart == len) lisstride = 1; 3061 } 3062 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3063 if (gisstride) { 3064 PetscInt N; 3065 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3066 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3067 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3068 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3069 } else { 3070 PetscInt cbs; 3071 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3072 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3073 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3074 } 3075 } 3076 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3077 if (call == MAT_INITIAL_MATRIX) { 3078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3079 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3080 } 3081 PetscFunctionReturn(0); 3082 } 3083 3084 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3087 /* 3088 Not great since it makes two copies of the submatrix, first an SeqAIJ 3089 in local and then by concatenating the local matrices the end result. 3090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3091 3092 Note: This requires a sequential iscol with all indices. 3093 */ 3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3095 { 3096 PetscErrorCode ierr; 3097 PetscMPIInt rank,size; 3098 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3099 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3100 PetscBool allcolumns, colflag; 3101 Mat M,Mreuse; 3102 MatScalar *vwork,*aa; 3103 MPI_Comm comm; 3104 Mat_SeqAIJ *aij; 3105 3106 PetscFunctionBegin; 3107 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3110 3111 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3112 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3113 if (colflag && ncol == mat->cmap->N) { 3114 allcolumns = PETSC_TRUE; 3115 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3116 } else { 3117 allcolumns = PETSC_FALSE; 3118 } 3119 if (call == MAT_REUSE_MATRIX) { 3120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3121 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3122 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3123 } else { 3124 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3125 } 3126 3127 /* 3128 m - number of local rows 3129 n - number of columns (same on all processors) 3130 rstart - first row in new global matrix generated 3131 */ 3132 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3133 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3134 if (call == MAT_INITIAL_MATRIX) { 3135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3136 ii = aij->i; 3137 jj = aij->j; 3138 3139 /* 3140 Determine the number of non-zeros in the diagonal and off-diagonal 3141 portions of the matrix in order to do correct preallocation 3142 */ 3143 3144 /* first get start and end of "diagonal" columns */ 3145 if (csize == PETSC_DECIDE) { 3146 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3147 if (mglobal == n) { /* square matrix */ 3148 nlocal = m; 3149 } else { 3150 nlocal = n/size + ((n % size) > rank); 3151 } 3152 } else { 3153 nlocal = csize; 3154 } 3155 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3156 rstart = rend - nlocal; 3157 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3158 3159 /* next, compute all the lengths */ 3160 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3161 olens = dlens + m; 3162 for (i=0; i<m; i++) { 3163 jend = ii[i+1] - ii[i]; 3164 olen = 0; 3165 dlen = 0; 3166 for (j=0; j<jend; j++) { 3167 if (*jj < rstart || *jj >= rend) olen++; 3168 else dlen++; 3169 jj++; 3170 } 3171 olens[i] = olen; 3172 dlens[i] = dlen; 3173 } 3174 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3175 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3176 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3177 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3178 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3179 ierr = PetscFree(dlens);CHKERRQ(ierr); 3180 } else { 3181 PetscInt ml,nl; 3182 3183 M = *newmat; 3184 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3185 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3186 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3187 /* 3188 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3189 rather than the slower MatSetValues(). 3190 */ 3191 M->was_assembled = PETSC_TRUE; 3192 M->assembled = PETSC_FALSE; 3193 } 3194 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3195 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3196 ii = aij->i; 3197 jj = aij->j; 3198 aa = aij->a; 3199 for (i=0; i<m; i++) { 3200 row = rstart + i; 3201 nz = ii[i+1] - ii[i]; 3202 cwork = jj; jj += nz; 3203 vwork = aa; aa += nz; 3204 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3205 } 3206 3207 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 *newmat = M; 3210 3211 /* save submatrix used in processor for next request */ 3212 if (call == MAT_INITIAL_MATRIX) { 3213 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3214 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3215 } 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222 { 3223 PetscInt m,cstart, cend,j,nnz,i,d; 3224 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225 const PetscInt *JJ; 3226 PetscScalar *values; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231 3232 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3234 m = B->rmap->n; 3235 cstart = B->cmap->rstart; 3236 cend = B->cmap->rend; 3237 rstart = B->rmap->rstart; 3238 3239 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3240 3241 #if defined(PETSC_USE_DEBUGGING) 3242 for (i=0; i<m; i++) { 3243 nnz = Ii[i+1]- Ii[i]; 3244 JJ = J + Ii[i]; 3245 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3246 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3247 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3248 } 3249 #endif 3250 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 nnz_max = PetscMax(nnz_max,nnz); 3255 d = 0; 3256 for (j=0; j<nnz; j++) { 3257 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3258 } 3259 d_nnz[i] = d; 3260 o_nnz[i] = nnz - d; 3261 } 3262 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3263 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3264 3265 if (v) values = (PetscScalar*)v; 3266 else { 3267 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3268 } 3269 3270 for (i=0; i<m; i++) { 3271 ii = i + rstart; 3272 nnz = Ii[i+1]- Ii[i]; 3273 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3274 } 3275 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 3278 if (!v) { 3279 ierr = PetscFree(values);CHKERRQ(ierr); 3280 } 3281 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3287 /*@ 3288 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3289 (the default parallel PETSc format). 3290 3291 Collective on MPI_Comm 3292 3293 Input Parameters: 3294 + B - the matrix 3295 . i - the indices into j for the start of each local row (starts with zero) 3296 . j - the column indices for each local row (starts with zero) 3297 - v - optional values in the matrix 3298 3299 Level: developer 3300 3301 Notes: 3302 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3303 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3304 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3305 3306 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3307 3308 The format which is used for the sparse matrix input, is equivalent to a 3309 row-major ordering.. i.e for the following matrix, the input data expected is 3310 as shown 3311 3312 $ 1 0 0 3313 $ 2 0 3 P0 3314 $ ------- 3315 $ 4 5 6 P1 3316 $ 3317 $ Process0 [P0]: rows_owned=[0,1] 3318 $ i = {0,1,3} [size = nrow+1 = 2+1] 3319 $ j = {0,0,2} [size = 3] 3320 $ v = {1,2,3} [size = 3] 3321 $ 3322 $ Process1 [P1]: rows_owned=[2] 3323 $ i = {0,3} [size = nrow+1 = 1+1] 3324 $ j = {0,1,2} [size = 3] 3325 $ v = {4,5,6} [size = 3] 3326 3327 .keywords: matrix, aij, compressed row, sparse, parallel 3328 3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3330 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3331 @*/ 3332 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3333 { 3334 PetscErrorCode ierr; 3335 3336 PetscFunctionBegin; 3337 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3343 /*@C 3344 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3345 (the default parallel PETSc format). For good matrix assembly performance 3346 the user should preallocate the matrix storage by setting the parameters 3347 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3348 performance can be increased by more than a factor of 50. 3349 3350 Collective on MPI_Comm 3351 3352 Input Parameters: 3353 + B - the matrix 3354 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3355 (same value is used for all local rows) 3356 . d_nnz - array containing the number of nonzeros in the various rows of the 3357 DIAGONAL portion of the local submatrix (possibly different for each row) 3358 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3359 The size of this array is equal to the number of local rows, i.e 'm'. 3360 For matrices that will be factored, you must leave room for (and set) 3361 the diagonal entry even if it is zero. 3362 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3363 submatrix (same value is used for all local rows). 3364 - o_nnz - array containing the number of nonzeros in the various rows of the 3365 OFF-DIAGONAL portion of the local submatrix (possibly different for 3366 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3367 structure. The size of this array is equal to the number 3368 of local rows, i.e 'm'. 3369 3370 If the *_nnz parameter is given then the *_nz parameter is ignored 3371 3372 The AIJ format (also called the Yale sparse matrix format or 3373 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3374 storage. The stored row and column indices begin with zero. 3375 See Users-Manual: ch_mat for details. 3376 3377 The parallel matrix is partitioned such that the first m0 rows belong to 3378 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3379 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3380 3381 The DIAGONAL portion of the local submatrix of a processor can be defined 3382 as the submatrix which is obtained by extraction the part corresponding to 3383 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3384 first row that belongs to the processor, r2 is the last row belonging to 3385 the this processor, and c1-c2 is range of indices of the local part of a 3386 vector suitable for applying the matrix to. This is an mxn matrix. In the 3387 common case of a square matrix, the row and column ranges are the same and 3388 the DIAGONAL part is also square. The remaining portion of the local 3389 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3390 3391 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3392 3393 You can call MatGetInfo() to get information on how effective the preallocation was; 3394 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3395 You can also run with the option -info and look for messages with the string 3396 malloc in them to see if additional memory allocation was needed. 3397 3398 Example usage: 3399 3400 Consider the following 8x8 matrix with 34 non-zero values, that is 3401 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3402 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3403 as follows: 3404 3405 .vb 3406 1 2 0 | 0 3 0 | 0 4 3407 Proc0 0 5 6 | 7 0 0 | 8 0 3408 9 0 10 | 11 0 0 | 12 0 3409 ------------------------------------- 3410 13 0 14 | 15 16 17 | 0 0 3411 Proc1 0 18 0 | 19 20 21 | 0 0 3412 0 0 0 | 22 23 0 | 24 0 3413 ------------------------------------- 3414 Proc2 25 26 27 | 0 0 28 | 29 0 3415 30 0 0 | 31 32 33 | 0 34 3416 .ve 3417 3418 This can be represented as a collection of submatrices as: 3419 3420 .vb 3421 A B C 3422 D E F 3423 G H I 3424 .ve 3425 3426 Where the submatrices A,B,C are owned by proc0, D,E,F are 3427 owned by proc1, G,H,I are owned by proc2. 3428 3429 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3430 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'M','N' parameters are 8,8, and have the same values on all procs. 3432 3433 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3434 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3435 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3436 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3437 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3438 matrix, ans [DF] as another SeqAIJ matrix. 3439 3440 When d_nz, o_nz parameters are specified, d_nz storage elements are 3441 allocated for every row of the local diagonal submatrix, and o_nz 3442 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3443 One way to choose d_nz and o_nz is to use the max nonzerors per local 3444 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3445 In this case, the values of d_nz,o_nz are: 3446 .vb 3447 proc0 : dnz = 2, o_nz = 2 3448 proc1 : dnz = 3, o_nz = 2 3449 proc2 : dnz = 1, o_nz = 4 3450 .ve 3451 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3452 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3453 for proc3. i.e we are using 12+15+10=37 storage locations to store 3454 34 values. 3455 3456 When d_nnz, o_nnz parameters are specified, the storage is specified 3457 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3458 In the above case the values for d_nnz,o_nnz are: 3459 .vb 3460 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3461 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3462 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3463 .ve 3464 Here the space allocated is sum of all the above values i.e 34, and 3465 hence pre-allocation is perfect. 3466 3467 Level: intermediate 3468 3469 .keywords: matrix, aij, compressed row, sparse, parallel 3470 3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3472 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3473 @*/ 3474 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3475 { 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3480 PetscValidType(B,1); 3481 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3487 /*@ 3488 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3489 CSR format the local rows. 3490 3491 Collective on MPI_Comm 3492 3493 Input Parameters: 3494 + comm - MPI communicator 3495 . m - number of local rows (Cannot be PETSC_DECIDE) 3496 . n - This value should be the same as the local size used in creating the 3497 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3498 calculated if N is given) For square matrices n is almost always m. 3499 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3500 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3501 . i - row indices 3502 . j - column indices 3503 - a - matrix values 3504 3505 Output Parameter: 3506 . mat - the matrix 3507 3508 Level: intermediate 3509 3510 Notes: 3511 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3512 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3513 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3514 3515 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3516 3517 The format which is used for the sparse matrix input, is equivalent to a 3518 row-major ordering.. i.e for the following matrix, the input data expected is 3519 as shown 3520 3521 $ 1 0 0 3522 $ 2 0 3 P0 3523 $ ------- 3524 $ 4 5 6 P1 3525 $ 3526 $ Process0 [P0]: rows_owned=[0,1] 3527 $ i = {0,1,3} [size = nrow+1 = 2+1] 3528 $ j = {0,0,2} [size = 3] 3529 $ v = {1,2,3} [size = 3] 3530 $ 3531 $ Process1 [P1]: rows_owned=[2] 3532 $ i = {0,3} [size = nrow+1 = 1+1] 3533 $ j = {0,1,2} [size = 3] 3534 $ v = {4,5,6} [size = 3] 3535 3536 .keywords: matrix, aij, compressed row, sparse, parallel 3537 3538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3539 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3540 @*/ 3541 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3542 { 3543 PetscErrorCode ierr; 3544 3545 PetscFunctionBegin; 3546 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3547 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3548 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3549 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3550 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3551 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3552 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatCreateAIJ" 3558 /*@C 3559 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3560 (the default parallel PETSc format). For good matrix assembly performance 3561 the user should preallocate the matrix storage by setting the parameters 3562 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3563 performance can be increased by more than a factor of 50. 3564 3565 Collective on MPI_Comm 3566 3567 Input Parameters: 3568 + comm - MPI communicator 3569 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3570 This value should be the same as the local size used in creating the 3571 y vector for the matrix-vector product y = Ax. 3572 . n - This value should be the same as the local size used in creating the 3573 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3574 calculated if N is given) For square matrices n is almost always m. 3575 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3576 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3577 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3578 (same value is used for all local rows) 3579 . d_nnz - array containing the number of nonzeros in the various rows of the 3580 DIAGONAL portion of the local submatrix (possibly different for each row) 3581 or NULL, if d_nz is used to specify the nonzero structure. 3582 The size of this array is equal to the number of local rows, i.e 'm'. 3583 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3584 submatrix (same value is used for all local rows). 3585 - o_nnz - array containing the number of nonzeros in the various rows of the 3586 OFF-DIAGONAL portion of the local submatrix (possibly different for 3587 each row) or NULL, if o_nz is used to specify the nonzero 3588 structure. The size of this array is equal to the number 3589 of local rows, i.e 'm'. 3590 3591 Output Parameter: 3592 . A - the matrix 3593 3594 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3595 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3596 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3597 3598 Notes: 3599 If the *_nnz parameter is given then the *_nz parameter is ignored 3600 3601 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3602 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3603 storage requirements for this matrix. 3604 3605 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3606 processor than it must be used on all processors that share the object for 3607 that argument. 3608 3609 The user MUST specify either the local or global matrix dimensions 3610 (possibly both). 3611 3612 The parallel matrix is partitioned across processors such that the 3613 first m0 rows belong to process 0, the next m1 rows belong to 3614 process 1, the next m2 rows belong to process 2 etc.. where 3615 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3616 values corresponding to [m x N] submatrix. 3617 3618 The columns are logically partitioned with the n0 columns belonging 3619 to 0th partition, the next n1 columns belonging to the next 3620 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3621 3622 The DIAGONAL portion of the local submatrix on any given processor 3623 is the submatrix corresponding to the rows and columns m,n 3624 corresponding to the given processor. i.e diagonal matrix on 3625 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3626 etc. The remaining portion of the local submatrix [m x (N-n)] 3627 constitute the OFF-DIAGONAL portion. The example below better 3628 illustrates this concept. 3629 3630 For a square global matrix we define each processor's diagonal portion 3631 to be its local rows and the corresponding columns (a square submatrix); 3632 each processor's off-diagonal portion encompasses the remainder of the 3633 local matrix (a rectangular submatrix). 3634 3635 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3636 3637 When calling this routine with a single process communicator, a matrix of 3638 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3639 type of communicator, use the construction mechanism: 3640 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3641 3642 By default, this format uses inodes (identical nodes) when possible. 3643 We search for consecutive rows with the same nonzero structure, thereby 3644 reusing matrix information to achieve increased efficiency. 3645 3646 Options Database Keys: 3647 + -mat_no_inode - Do not use inodes 3648 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3649 - -mat_aij_oneindex - Internally use indexing starting at 1 3650 rather than 0. Note that when calling MatSetValues(), 3651 the user still MUST index entries starting at 0! 3652 3653 3654 Example usage: 3655 3656 Consider the following 8x8 matrix with 34 non-zero values, that is 3657 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3658 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3659 as follows: 3660 3661 .vb 3662 1 2 0 | 0 3 0 | 0 4 3663 Proc0 0 5 6 | 7 0 0 | 8 0 3664 9 0 10 | 11 0 0 | 12 0 3665 ------------------------------------- 3666 13 0 14 | 15 16 17 | 0 0 3667 Proc1 0 18 0 | 19 20 21 | 0 0 3668 0 0 0 | 22 23 0 | 24 0 3669 ------------------------------------- 3670 Proc2 25 26 27 | 0 0 28 | 29 0 3671 30 0 0 | 31 32 33 | 0 34 3672 .ve 3673 3674 This can be represented as a collection of submatrices as: 3675 3676 .vb 3677 A B C 3678 D E F 3679 G H I 3680 .ve 3681 3682 Where the submatrices A,B,C are owned by proc0, D,E,F are 3683 owned by proc1, G,H,I are owned by proc2. 3684 3685 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3686 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3687 The 'M','N' parameters are 8,8, and have the same values on all procs. 3688 3689 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3690 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3691 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3692 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3693 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3694 matrix, ans [DF] as another SeqAIJ matrix. 3695 3696 When d_nz, o_nz parameters are specified, d_nz storage elements are 3697 allocated for every row of the local diagonal submatrix, and o_nz 3698 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3699 One way to choose d_nz and o_nz is to use the max nonzerors per local 3700 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3701 In this case, the values of d_nz,o_nz are: 3702 .vb 3703 proc0 : dnz = 2, o_nz = 2 3704 proc1 : dnz = 3, o_nz = 2 3705 proc2 : dnz = 1, o_nz = 4 3706 .ve 3707 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3708 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3709 for proc3. i.e we are using 12+15+10=37 storage locations to store 3710 34 values. 3711 3712 When d_nnz, o_nnz parameters are specified, the storage is specified 3713 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3714 In the above case the values for d_nnz,o_nnz are: 3715 .vb 3716 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3717 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3718 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3719 .ve 3720 Here the space allocated is sum of all the above values i.e 34, and 3721 hence pre-allocation is perfect. 3722 3723 Level: intermediate 3724 3725 .keywords: matrix, aij, compressed row, sparse, parallel 3726 3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3728 MPIAIJ, MatCreateMPIAIJWithArrays() 3729 @*/ 3730 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt size; 3734 3735 PetscFunctionBegin; 3736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3737 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3739 if (size > 1) { 3740 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3741 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3744 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3745 } 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3751 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3752 { 3753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3754 PetscBool flg; 3755 PetscErrorCode ierr; 3756 3757 PetscFunctionBegin; 3758 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3759 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3760 if (Ad) *Ad = a->A; 3761 if (Ao) *Ao = a->B; 3762 if (colmap) *colmap = a->garray; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3768 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3769 { 3770 PetscErrorCode ierr; 3771 PetscInt i; 3772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3773 3774 PetscFunctionBegin; 3775 if (coloring->ctype == IS_COLORING_GLOBAL) { 3776 ISColoringValue *allcolors,*colors; 3777 ISColoring ocoloring; 3778 3779 /* set coloring for diagonal portion */ 3780 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3781 3782 /* set coloring for off-diagonal portion */ 3783 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3784 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3785 for (i=0; i<a->B->cmap->n; i++) { 3786 colors[i] = allcolors[a->garray[i]]; 3787 } 3788 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3789 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3790 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3791 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3792 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3793 ISColoringValue *colors; 3794 PetscInt *larray; 3795 ISColoring ocoloring; 3796 3797 /* set coloring for diagonal portion */ 3798 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3799 for (i=0; i<a->A->cmap->n; i++) { 3800 larray[i] = i + A->cmap->rstart; 3801 } 3802 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3803 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3804 for (i=0; i<a->A->cmap->n; i++) { 3805 colors[i] = coloring->colors[larray[i]]; 3806 } 3807 ierr = PetscFree(larray);CHKERRQ(ierr); 3808 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3809 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3810 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3811 3812 /* set coloring for off-diagonal portion */ 3813 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3814 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3815 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3816 for (i=0; i<a->B->cmap->n; i++) { 3817 colors[i] = coloring->colors[larray[i]]; 3818 } 3819 ierr = PetscFree(larray);CHKERRQ(ierr); 3820 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3821 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3822 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3823 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3824 PetscFunctionReturn(0); 3825 } 3826 3827 #undef __FUNCT__ 3828 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3829 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3830 { 3831 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3836 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3843 { 3844 PetscErrorCode ierr; 3845 PetscInt m,N,i,rstart,nnz,Ii; 3846 PetscInt *indx; 3847 PetscScalar *values; 3848 3849 PetscFunctionBegin; 3850 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3851 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3852 PetscInt *dnz,*onz,sum,bs,cbs; 3853 3854 if (n == PETSC_DECIDE) { 3855 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3856 } 3857 /* Check sum(n) = N */ 3858 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3859 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3860 3861 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3862 rstart -= m; 3863 3864 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3865 for (i=0; i<m; i++) { 3866 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3867 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3868 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3869 } 3870 3871 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3872 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3873 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3874 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3875 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3876 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3877 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3878 } 3879 3880 /* numeric phase */ 3881 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3882 for (i=0; i<m; i++) { 3883 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3884 Ii = i + rstart; 3885 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3886 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3887 } 3888 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3889 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatFileSplit" 3895 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3896 { 3897 PetscErrorCode ierr; 3898 PetscMPIInt rank; 3899 PetscInt m,N,i,rstart,nnz; 3900 size_t len; 3901 const PetscInt *indx; 3902 PetscViewer out; 3903 char *name; 3904 Mat B; 3905 const PetscScalar *values; 3906 3907 PetscFunctionBegin; 3908 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3909 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3910 /* Should this be the type of the diagonal block of A? */ 3911 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3912 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3913 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3914 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3915 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3916 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3917 for (i=0; i<m; i++) { 3918 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3919 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3920 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3921 } 3922 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3923 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3924 3925 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3926 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3927 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3928 sprintf(name,"%s.%d",outfile,rank); 3929 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3930 ierr = PetscFree(name);CHKERRQ(ierr); 3931 ierr = MatView(B,out);CHKERRQ(ierr); 3932 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3933 ierr = MatDestroy(&B);CHKERRQ(ierr); 3934 PetscFunctionReturn(0); 3935 } 3936 3937 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3940 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3941 { 3942 PetscErrorCode ierr; 3943 Mat_Merge_SeqsToMPI *merge; 3944 PetscContainer container; 3945 3946 PetscFunctionBegin; 3947 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3948 if (container) { 3949 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3950 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3951 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3952 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3953 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3954 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3955 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3956 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3957 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3958 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3959 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3960 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3961 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3962 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3963 ierr = PetscFree(merge);CHKERRQ(ierr); 3964 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3965 } 3966 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 #include <../src/mat/utils/freespace.h> 3971 #include <petscbt.h> 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3975 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3976 { 3977 PetscErrorCode ierr; 3978 MPI_Comm comm; 3979 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3980 PetscMPIInt size,rank,taga,*len_s; 3981 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3982 PetscInt proc,m; 3983 PetscInt **buf_ri,**buf_rj; 3984 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3985 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3986 MPI_Request *s_waits,*r_waits; 3987 MPI_Status *status; 3988 MatScalar *aa=a->a; 3989 MatScalar **abuf_r,*ba_i; 3990 Mat_Merge_SeqsToMPI *merge; 3991 PetscContainer container; 3992 3993 PetscFunctionBegin; 3994 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3995 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3996 3997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3998 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3999 4000 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4001 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4002 4003 bi = merge->bi; 4004 bj = merge->bj; 4005 buf_ri = merge->buf_ri; 4006 buf_rj = merge->buf_rj; 4007 4008 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4009 owners = merge->rowmap->range; 4010 len_s = merge->len_s; 4011 4012 /* send and recv matrix values */ 4013 /*-----------------------------*/ 4014 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4015 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4016 4017 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4018 for (proc=0,k=0; proc<size; proc++) { 4019 if (!len_s[proc]) continue; 4020 i = owners[proc]; 4021 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4022 k++; 4023 } 4024 4025 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4026 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4027 ierr = PetscFree(status);CHKERRQ(ierr); 4028 4029 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4030 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4031 4032 /* insert mat values of mpimat */ 4033 /*----------------------------*/ 4034 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4035 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4036 4037 for (k=0; k<merge->nrecv; k++) { 4038 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4039 nrows = *(buf_ri_k[k]); 4040 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4041 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4042 } 4043 4044 /* set values of ba */ 4045 m = merge->rowmap->n; 4046 for (i=0; i<m; i++) { 4047 arow = owners[rank] + i; 4048 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4049 bnzi = bi[i+1] - bi[i]; 4050 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4051 4052 /* add local non-zero vals of this proc's seqmat into ba */ 4053 anzi = ai[arow+1] - ai[arow]; 4054 aj = a->j + ai[arow]; 4055 aa = a->a + ai[arow]; 4056 nextaj = 0; 4057 for (j=0; nextaj<anzi; j++) { 4058 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4059 ba_i[j] += aa[nextaj++]; 4060 } 4061 } 4062 4063 /* add received vals into ba */ 4064 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4065 /* i-th row */ 4066 if (i == *nextrow[k]) { 4067 anzi = *(nextai[k]+1) - *nextai[k]; 4068 aj = buf_rj[k] + *(nextai[k]); 4069 aa = abuf_r[k] + *(nextai[k]); 4070 nextaj = 0; 4071 for (j=0; nextaj<anzi; j++) { 4072 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4073 ba_i[j] += aa[nextaj++]; 4074 } 4075 } 4076 nextrow[k]++; nextai[k]++; 4077 } 4078 } 4079 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4080 } 4081 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4082 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4083 4084 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4085 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4086 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4087 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4088 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4089 PetscFunctionReturn(0); 4090 } 4091 4092 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4093 4094 #undef __FUNCT__ 4095 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4096 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4097 { 4098 PetscErrorCode ierr; 4099 Mat B_mpi; 4100 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4101 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4102 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4103 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4104 PetscInt len,proc,*dnz,*onz,bs,cbs; 4105 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4106 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4107 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4108 MPI_Status *status; 4109 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4110 PetscBT lnkbt; 4111 Mat_Merge_SeqsToMPI *merge; 4112 PetscContainer container; 4113 4114 PetscFunctionBegin; 4115 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4116 4117 /* make sure it is a PETSc comm */ 4118 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4119 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4120 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4121 4122 ierr = PetscNew(&merge);CHKERRQ(ierr); 4123 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4124 4125 /* determine row ownership */ 4126 /*---------------------------------------------------------*/ 4127 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4128 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4129 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4130 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4131 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4132 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4133 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4134 4135 m = merge->rowmap->n; 4136 owners = merge->rowmap->range; 4137 4138 /* determine the number of messages to send, their lengths */ 4139 /*---------------------------------------------------------*/ 4140 len_s = merge->len_s; 4141 4142 len = 0; /* length of buf_si[] */ 4143 merge->nsend = 0; 4144 for (proc=0; proc<size; proc++) { 4145 len_si[proc] = 0; 4146 if (proc == rank) { 4147 len_s[proc] = 0; 4148 } else { 4149 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4150 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4151 } 4152 if (len_s[proc]) { 4153 merge->nsend++; 4154 nrows = 0; 4155 for (i=owners[proc]; i<owners[proc+1]; i++) { 4156 if (ai[i+1] > ai[i]) nrows++; 4157 } 4158 len_si[proc] = 2*(nrows+1); 4159 len += len_si[proc]; 4160 } 4161 } 4162 4163 /* determine the number and length of messages to receive for ij-structure */ 4164 /*-------------------------------------------------------------------------*/ 4165 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4166 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4167 4168 /* post the Irecv of j-structure */ 4169 /*-------------------------------*/ 4170 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4171 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4172 4173 /* post the Isend of j-structure */ 4174 /*--------------------------------*/ 4175 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4176 4177 for (proc=0, k=0; proc<size; proc++) { 4178 if (!len_s[proc]) continue; 4179 i = owners[proc]; 4180 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4181 k++; 4182 } 4183 4184 /* receives and sends of j-structure are complete */ 4185 /*------------------------------------------------*/ 4186 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4187 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4188 4189 /* send and recv i-structure */ 4190 /*---------------------------*/ 4191 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4192 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4193 4194 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4195 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4196 for (proc=0,k=0; proc<size; proc++) { 4197 if (!len_s[proc]) continue; 4198 /* form outgoing message for i-structure: 4199 buf_si[0]: nrows to be sent 4200 [1:nrows]: row index (global) 4201 [nrows+1:2*nrows+1]: i-structure index 4202 */ 4203 /*-------------------------------------------*/ 4204 nrows = len_si[proc]/2 - 1; 4205 buf_si_i = buf_si + nrows+1; 4206 buf_si[0] = nrows; 4207 buf_si_i[0] = 0; 4208 nrows = 0; 4209 for (i=owners[proc]; i<owners[proc+1]; i++) { 4210 anzi = ai[i+1] - ai[i]; 4211 if (anzi) { 4212 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4213 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4214 nrows++; 4215 } 4216 } 4217 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4218 k++; 4219 buf_si += len_si[proc]; 4220 } 4221 4222 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4223 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4224 4225 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4226 for (i=0; i<merge->nrecv; i++) { 4227 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4228 } 4229 4230 ierr = PetscFree(len_si);CHKERRQ(ierr); 4231 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4232 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4233 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4234 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4235 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4236 ierr = PetscFree(status);CHKERRQ(ierr); 4237 4238 /* compute a local seq matrix in each processor */ 4239 /*----------------------------------------------*/ 4240 /* allocate bi array and free space for accumulating nonzero column info */ 4241 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4242 bi[0] = 0; 4243 4244 /* create and initialize a linked list */ 4245 nlnk = N+1; 4246 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4247 4248 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4249 len = ai[owners[rank+1]] - ai[owners[rank]]; 4250 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4251 4252 current_space = free_space; 4253 4254 /* determine symbolic info for each local row */ 4255 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4256 4257 for (k=0; k<merge->nrecv; k++) { 4258 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4259 nrows = *buf_ri_k[k]; 4260 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4261 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4262 } 4263 4264 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4265 len = 0; 4266 for (i=0; i<m; i++) { 4267 bnzi = 0; 4268 /* add local non-zero cols of this proc's seqmat into lnk */ 4269 arow = owners[rank] + i; 4270 anzi = ai[arow+1] - ai[arow]; 4271 aj = a->j + ai[arow]; 4272 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4273 bnzi += nlnk; 4274 /* add received col data into lnk */ 4275 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4276 if (i == *nextrow[k]) { /* i-th row */ 4277 anzi = *(nextai[k]+1) - *nextai[k]; 4278 aj = buf_rj[k] + *nextai[k]; 4279 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4280 bnzi += nlnk; 4281 nextrow[k]++; nextai[k]++; 4282 } 4283 } 4284 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4285 4286 /* if free space is not available, make more free space */ 4287 if (current_space->local_remaining<bnzi) { 4288 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4289 nspacedouble++; 4290 } 4291 /* copy data into free space, then initialize lnk */ 4292 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4293 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4294 4295 current_space->array += bnzi; 4296 current_space->local_used += bnzi; 4297 current_space->local_remaining -= bnzi; 4298 4299 bi[i+1] = bi[i] + bnzi; 4300 } 4301 4302 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4303 4304 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4305 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4306 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4307 4308 /* create symbolic parallel matrix B_mpi */ 4309 /*---------------------------------------*/ 4310 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4311 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4312 if (n==PETSC_DECIDE) { 4313 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4314 } else { 4315 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4316 } 4317 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4318 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4319 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4320 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4321 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4322 4323 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4324 B_mpi->assembled = PETSC_FALSE; 4325 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4326 merge->bi = bi; 4327 merge->bj = bj; 4328 merge->buf_ri = buf_ri; 4329 merge->buf_rj = buf_rj; 4330 merge->coi = NULL; 4331 merge->coj = NULL; 4332 merge->owners_co = NULL; 4333 4334 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4335 4336 /* attach the supporting struct to B_mpi for reuse */ 4337 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4338 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4339 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4340 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4341 *mpimat = B_mpi; 4342 4343 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4344 PetscFunctionReturn(0); 4345 } 4346 4347 #undef __FUNCT__ 4348 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4349 /*@C 4350 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4351 matrices from each processor 4352 4353 Collective on MPI_Comm 4354 4355 Input Parameters: 4356 + comm - the communicators the parallel matrix will live on 4357 . seqmat - the input sequential matrices 4358 . m - number of local rows (or PETSC_DECIDE) 4359 . n - number of local columns (or PETSC_DECIDE) 4360 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4361 4362 Output Parameter: 4363 . mpimat - the parallel matrix generated 4364 4365 Level: advanced 4366 4367 Notes: 4368 The dimensions of the sequential matrix in each processor MUST be the same. 4369 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4370 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4371 @*/ 4372 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4373 { 4374 PetscErrorCode ierr; 4375 PetscMPIInt size; 4376 4377 PetscFunctionBegin; 4378 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4379 if (size == 1) { 4380 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4381 if (scall == MAT_INITIAL_MATRIX) { 4382 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4383 } else { 4384 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4385 } 4386 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4390 if (scall == MAT_INITIAL_MATRIX) { 4391 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4392 } 4393 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4394 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 #undef __FUNCT__ 4399 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4400 /*@ 4401 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4402 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4403 with MatGetSize() 4404 4405 Not Collective 4406 4407 Input Parameters: 4408 + A - the matrix 4409 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4410 4411 Output Parameter: 4412 . A_loc - the local sequential matrix generated 4413 4414 Level: developer 4415 4416 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4417 4418 @*/ 4419 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4420 { 4421 PetscErrorCode ierr; 4422 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4423 Mat_SeqAIJ *mat,*a,*b; 4424 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4425 MatScalar *aa,*ba,*cam; 4426 PetscScalar *ca; 4427 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4428 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4429 PetscBool match; 4430 MPI_Comm comm; 4431 PetscMPIInt size; 4432 4433 PetscFunctionBegin; 4434 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4435 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4436 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4438 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4439 4440 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4441 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4442 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4443 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4444 aa = a->a; ba = b->a; 4445 if (scall == MAT_INITIAL_MATRIX) { 4446 if (size == 1) { 4447 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4452 ci[0] = 0; 4453 for (i=0; i<am; i++) { 4454 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4455 } 4456 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4457 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4458 k = 0; 4459 for (i=0; i<am; i++) { 4460 ncols_o = bi[i+1] - bi[i]; 4461 ncols_d = ai[i+1] - ai[i]; 4462 /* off-diagonal portion of A */ 4463 for (jo=0; jo<ncols_o; jo++) { 4464 col = cmap[*bj]; 4465 if (col >= cstart) break; 4466 cj[k] = col; bj++; 4467 ca[k++] = *ba++; 4468 } 4469 /* diagonal portion of A */ 4470 for (j=0; j<ncols_d; j++) { 4471 cj[k] = cstart + *aj++; 4472 ca[k++] = *aa++; 4473 } 4474 /* off-diagonal portion of A */ 4475 for (j=jo; j<ncols_o; j++) { 4476 cj[k] = cmap[*bj++]; 4477 ca[k++] = *ba++; 4478 } 4479 } 4480 /* put together the new matrix */ 4481 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4482 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4483 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4484 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4485 mat->free_a = PETSC_TRUE; 4486 mat->free_ij = PETSC_TRUE; 4487 mat->nonew = 0; 4488 } else if (scall == MAT_REUSE_MATRIX) { 4489 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4490 ci = mat->i; cj = mat->j; cam = mat->a; 4491 for (i=0; i<am; i++) { 4492 /* off-diagonal portion of A */ 4493 ncols_o = bi[i+1] - bi[i]; 4494 for (jo=0; jo<ncols_o; jo++) { 4495 col = cmap[*bj]; 4496 if (col >= cstart) break; 4497 *cam++ = *ba++; bj++; 4498 } 4499 /* diagonal portion of A */ 4500 ncols_d = ai[i+1] - ai[i]; 4501 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4502 /* off-diagonal portion of A */ 4503 for (j=jo; j<ncols_o; j++) { 4504 *cam++ = *ba++; bj++; 4505 } 4506 } 4507 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4508 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4514 /*@C 4515 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4516 4517 Not Collective 4518 4519 Input Parameters: 4520 + A - the matrix 4521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4522 - row, col - index sets of rows and columns to extract (or NULL) 4523 4524 Output Parameter: 4525 . A_loc - the local sequential matrix generated 4526 4527 Level: developer 4528 4529 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4530 4531 @*/ 4532 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4533 { 4534 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4535 PetscErrorCode ierr; 4536 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4537 IS isrowa,iscola; 4538 Mat *aloc; 4539 PetscBool match; 4540 4541 PetscFunctionBegin; 4542 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4543 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4544 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4545 if (!row) { 4546 start = A->rmap->rstart; end = A->rmap->rend; 4547 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4548 } else { 4549 isrowa = *row; 4550 } 4551 if (!col) { 4552 start = A->cmap->rstart; 4553 cmap = a->garray; 4554 nzA = a->A->cmap->n; 4555 nzB = a->B->cmap->n; 4556 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4557 ncols = 0; 4558 for (i=0; i<nzB; i++) { 4559 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4560 else break; 4561 } 4562 imark = i; 4563 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4564 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4566 } else { 4567 iscola = *col; 4568 } 4569 if (scall != MAT_INITIAL_MATRIX) { 4570 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4571 aloc[0] = *A_loc; 4572 } 4573 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4574 *A_loc = aloc[0]; 4575 ierr = PetscFree(aloc);CHKERRQ(ierr); 4576 if (!row) { 4577 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4578 } 4579 if (!col) { 4580 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4581 } 4582 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 #undef __FUNCT__ 4587 #define __FUNCT__ "MatGetBrowsOfAcols" 4588 /*@C 4589 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4590 4591 Collective on Mat 4592 4593 Input Parameters: 4594 + A,B - the matrices in mpiaij format 4595 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4596 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4597 4598 Output Parameter: 4599 + rowb, colb - index sets of rows and columns of B to extract 4600 - B_seq - the sequential matrix generated 4601 4602 Level: developer 4603 4604 @*/ 4605 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4606 { 4607 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4608 PetscErrorCode ierr; 4609 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4610 IS isrowb,iscolb; 4611 Mat *bseq=NULL; 4612 4613 PetscFunctionBegin; 4614 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4615 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4616 } 4617 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4618 4619 if (scall == MAT_INITIAL_MATRIX) { 4620 start = A->cmap->rstart; 4621 cmap = a->garray; 4622 nzA = a->A->cmap->n; 4623 nzB = a->B->cmap->n; 4624 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4625 ncols = 0; 4626 for (i=0; i<nzB; i++) { /* row < local row index */ 4627 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4628 else break; 4629 } 4630 imark = i; 4631 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4632 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4633 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4634 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4635 } else { 4636 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4637 isrowb = *rowb; iscolb = *colb; 4638 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4639 bseq[0] = *B_seq; 4640 } 4641 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4642 *B_seq = bseq[0]; 4643 ierr = PetscFree(bseq);CHKERRQ(ierr); 4644 if (!rowb) { 4645 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4646 } else { 4647 *rowb = isrowb; 4648 } 4649 if (!colb) { 4650 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4651 } else { 4652 *colb = iscolb; 4653 } 4654 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #undef __FUNCT__ 4659 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4660 /* 4661 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4662 of the OFF-DIAGONAL portion of local A 4663 4664 Collective on Mat 4665 4666 Input Parameters: 4667 + A,B - the matrices in mpiaij format 4668 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4669 4670 Output Parameter: 4671 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4672 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4673 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4674 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4675 4676 Level: developer 4677 4678 */ 4679 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4680 { 4681 VecScatter_MPI_General *gen_to,*gen_from; 4682 PetscErrorCode ierr; 4683 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4684 Mat_SeqAIJ *b_oth; 4685 VecScatter ctx =a->Mvctx; 4686 MPI_Comm comm; 4687 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4688 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4689 PetscScalar *rvalues,*svalues; 4690 MatScalar *b_otha,*bufa,*bufA; 4691 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4692 MPI_Request *rwaits = NULL,*swaits = NULL; 4693 MPI_Status *sstatus,rstatus; 4694 PetscMPIInt jj,size; 4695 PetscInt *cols,sbs,rbs; 4696 PetscScalar *vals; 4697 4698 PetscFunctionBegin; 4699 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4700 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4701 4702 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4703 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4704 } 4705 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4706 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4707 4708 gen_to = (VecScatter_MPI_General*)ctx->todata; 4709 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4710 rvalues = gen_from->values; /* holds the length of receiving row */ 4711 svalues = gen_to->values; /* holds the length of sending row */ 4712 nrecvs = gen_from->n; 4713 nsends = gen_to->n; 4714 4715 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4716 srow = gen_to->indices; /* local row index to be sent */ 4717 sstarts = gen_to->starts; 4718 sprocs = gen_to->procs; 4719 sstatus = gen_to->sstatus; 4720 sbs = gen_to->bs; 4721 rstarts = gen_from->starts; 4722 rprocs = gen_from->procs; 4723 rbs = gen_from->bs; 4724 4725 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4726 if (scall == MAT_INITIAL_MATRIX) { 4727 /* i-array */ 4728 /*---------*/ 4729 /* post receives */ 4730 for (i=0; i<nrecvs; i++) { 4731 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4732 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4733 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4734 } 4735 4736 /* pack the outgoing message */ 4737 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4738 4739 sstartsj[0] = 0; 4740 rstartsj[0] = 0; 4741 len = 0; /* total length of j or a array to be sent */ 4742 k = 0; 4743 for (i=0; i<nsends; i++) { 4744 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4745 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4746 for (j=0; j<nrows; j++) { 4747 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4748 for (l=0; l<sbs; l++) { 4749 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4750 4751 rowlen[j*sbs+l] = ncols; 4752 4753 len += ncols; 4754 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4755 } 4756 k++; 4757 } 4758 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4759 4760 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4761 } 4762 /* recvs and sends of i-array are completed */ 4763 i = nrecvs; 4764 while (i--) { 4765 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4766 } 4767 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4768 4769 /* allocate buffers for sending j and a arrays */ 4770 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4771 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4772 4773 /* create i-array of B_oth */ 4774 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4775 4776 b_othi[0] = 0; 4777 len = 0; /* total length of j or a array to be received */ 4778 k = 0; 4779 for (i=0; i<nrecvs; i++) { 4780 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4781 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4782 for (j=0; j<nrows; j++) { 4783 b_othi[k+1] = b_othi[k] + rowlen[j]; 4784 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4785 k++; 4786 } 4787 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4788 } 4789 4790 /* allocate space for j and a arrrays of B_oth */ 4791 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4792 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4793 4794 /* j-array */ 4795 /*---------*/ 4796 /* post receives of j-array */ 4797 for (i=0; i<nrecvs; i++) { 4798 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4799 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4800 } 4801 4802 /* pack the outgoing message j-array */ 4803 k = 0; 4804 for (i=0; i<nsends; i++) { 4805 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4806 bufJ = bufj+sstartsj[i]; 4807 for (j=0; j<nrows; j++) { 4808 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4809 for (ll=0; ll<sbs; ll++) { 4810 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4811 for (l=0; l<ncols; l++) { 4812 *bufJ++ = cols[l]; 4813 } 4814 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4815 } 4816 } 4817 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4818 } 4819 4820 /* recvs and sends of j-array are completed */ 4821 i = nrecvs; 4822 while (i--) { 4823 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4824 } 4825 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4826 } else if (scall == MAT_REUSE_MATRIX) { 4827 sstartsj = *startsj_s; 4828 rstartsj = *startsj_r; 4829 bufa = *bufa_ptr; 4830 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4831 b_otha = b_oth->a; 4832 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4833 4834 /* a-array */ 4835 /*---------*/ 4836 /* post receives of a-array */ 4837 for (i=0; i<nrecvs; i++) { 4838 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4839 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4840 } 4841 4842 /* pack the outgoing message a-array */ 4843 k = 0; 4844 for (i=0; i<nsends; i++) { 4845 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4846 bufA = bufa+sstartsj[i]; 4847 for (j=0; j<nrows; j++) { 4848 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4849 for (ll=0; ll<sbs; ll++) { 4850 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4851 for (l=0; l<ncols; l++) { 4852 *bufA++ = vals[l]; 4853 } 4854 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4855 } 4856 } 4857 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4858 } 4859 /* recvs and sends of a-array are completed */ 4860 i = nrecvs; 4861 while (i--) { 4862 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4863 } 4864 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4865 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4866 4867 if (scall == MAT_INITIAL_MATRIX) { 4868 /* put together the new matrix */ 4869 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4870 4871 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4872 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4873 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4874 b_oth->free_a = PETSC_TRUE; 4875 b_oth->free_ij = PETSC_TRUE; 4876 b_oth->nonew = 0; 4877 4878 ierr = PetscFree(bufj);CHKERRQ(ierr); 4879 if (!startsj_s || !bufa_ptr) { 4880 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4881 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4882 } else { 4883 *startsj_s = sstartsj; 4884 *startsj_r = rstartsj; 4885 *bufa_ptr = bufa; 4886 } 4887 } 4888 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4889 PetscFunctionReturn(0); 4890 } 4891 4892 #undef __FUNCT__ 4893 #define __FUNCT__ "MatGetCommunicationStructs" 4894 /*@C 4895 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4896 4897 Not Collective 4898 4899 Input Parameters: 4900 . A - The matrix in mpiaij format 4901 4902 Output Parameter: 4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4904 . colmap - A map from global column index to local index into lvec 4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4906 4907 Level: developer 4908 4909 @*/ 4910 #if defined(PETSC_USE_CTABLE) 4911 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4912 #else 4913 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4914 #endif 4915 { 4916 Mat_MPIAIJ *a; 4917 4918 PetscFunctionBegin; 4919 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4920 PetscValidPointer(lvec, 2); 4921 PetscValidPointer(colmap, 3); 4922 PetscValidPointer(multScatter, 4); 4923 a = (Mat_MPIAIJ*) A->data; 4924 if (lvec) *lvec = a->lvec; 4925 if (colmap) *colmap = a->colmap; 4926 if (multScatter) *multScatter = a->Mvctx; 4927 PetscFunctionReturn(0); 4928 } 4929 4930 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4933 #if defined(PETSC_HAVE_ELEMENTAL) 4934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4935 #endif 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4939 /* 4940 Computes (B'*A')' since computing B*A directly is untenable 4941 4942 n p p 4943 ( ) ( ) ( ) 4944 m ( A ) * n ( B ) = m ( C ) 4945 ( ) ( ) ( ) 4946 4947 */ 4948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4949 { 4950 PetscErrorCode ierr; 4951 Mat At,Bt,Ct; 4952 4953 PetscFunctionBegin; 4954 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4955 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4956 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4957 ierr = MatDestroy(&At);CHKERRQ(ierr); 4958 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4959 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4960 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 4964 #undef __FUNCT__ 4965 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4966 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4967 { 4968 PetscErrorCode ierr; 4969 PetscInt m=A->rmap->n,n=B->cmap->n; 4970 Mat Cmat; 4971 4972 PetscFunctionBegin; 4973 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4975 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4976 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4977 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4978 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4979 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4980 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4981 4982 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4983 4984 *C = Cmat; 4985 PetscFunctionReturn(0); 4986 } 4987 4988 /* ----------------------------------------------------------------*/ 4989 #undef __FUNCT__ 4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4991 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4992 { 4993 PetscErrorCode ierr; 4994 4995 PetscFunctionBegin; 4996 if (scall == MAT_INITIAL_MATRIX) { 4997 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4998 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4999 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5000 } 5001 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5002 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5003 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 /*MC 5008 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5009 5010 Options Database Keys: 5011 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5012 5013 Level: beginner 5014 5015 .seealso: MatCreateAIJ() 5016 M*/ 5017 5018 #undef __FUNCT__ 5019 #define __FUNCT__ "MatCreate_MPIAIJ" 5020 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5021 { 5022 Mat_MPIAIJ *b; 5023 PetscErrorCode ierr; 5024 PetscMPIInt size; 5025 5026 PetscFunctionBegin; 5027 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5028 5029 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5030 B->data = (void*)b; 5031 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5032 B->assembled = PETSC_FALSE; 5033 B->insertmode = NOT_SET_VALUES; 5034 b->size = size; 5035 5036 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5037 5038 /* build cache for off array entries formed */ 5039 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5040 5041 b->donotstash = PETSC_FALSE; 5042 b->colmap = 0; 5043 b->garray = 0; 5044 b->roworiented = PETSC_TRUE; 5045 5046 /* stuff used for matrix vector multiply */ 5047 b->lvec = NULL; 5048 b->Mvctx = NULL; 5049 5050 /* stuff for MatGetRow() */ 5051 b->rowindices = 0; 5052 b->rowvalues = 0; 5053 b->getrowactive = PETSC_FALSE; 5054 5055 /* flexible pointer used in CUSP/CUSPARSE classes */ 5056 b->spptr = NULL; 5057 5058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5060 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5062 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5068 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5069 #if defined(PETSC_HAVE_ELEMENTAL) 5070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5071 #endif 5072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5075 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5076 PetscFunctionReturn(0); 5077 } 5078 5079 #undef __FUNCT__ 5080 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5081 /*@C 5082 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5083 and "off-diagonal" part of the matrix in CSR format. 5084 5085 Collective on MPI_Comm 5086 5087 Input Parameters: 5088 + comm - MPI communicator 5089 . m - number of local rows (Cannot be PETSC_DECIDE) 5090 . n - This value should be the same as the local size used in creating the 5091 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5092 calculated if N is given) For square matrices n is almost always m. 5093 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5094 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5095 . i - row indices for "diagonal" portion of matrix 5096 . j - column indices 5097 . a - matrix values 5098 . oi - row indices for "off-diagonal" portion of matrix 5099 . oj - column indices 5100 - oa - matrix values 5101 5102 Output Parameter: 5103 . mat - the matrix 5104 5105 Level: advanced 5106 5107 Notes: 5108 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5109 must free the arrays once the matrix has been destroyed and not before. 5110 5111 The i and j indices are 0 based 5112 5113 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5114 5115 This sets local rows and cannot be used to set off-processor values. 5116 5117 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5118 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5119 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5120 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5121 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5122 communication if it is known that only local entries will be set. 5123 5124 .keywords: matrix, aij, compressed row, sparse, parallel 5125 5126 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5127 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5128 @*/ 5129 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5130 { 5131 PetscErrorCode ierr; 5132 Mat_MPIAIJ *maij; 5133 5134 PetscFunctionBegin; 5135 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5136 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5137 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5138 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5139 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5140 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5141 maij = (Mat_MPIAIJ*) (*mat)->data; 5142 5143 (*mat)->preallocated = PETSC_TRUE; 5144 5145 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5146 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5147 5148 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5149 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5150 5151 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5152 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5153 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5154 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5155 5156 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5157 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5158 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5159 PetscFunctionReturn(0); 5160 } 5161 5162 /* 5163 Special version for direct calls from Fortran 5164 */ 5165 #include <petsc/private/fortranimpl.h> 5166 5167 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5168 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5169 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5170 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5171 #endif 5172 5173 /* Change these macros so can be used in void function */ 5174 #undef CHKERRQ 5175 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5176 #undef SETERRQ2 5177 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5178 #undef SETERRQ3 5179 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5180 #undef SETERRQ 5181 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5182 5183 #undef __FUNCT__ 5184 #define __FUNCT__ "matsetvaluesmpiaij_" 5185 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5186 { 5187 Mat mat = *mmat; 5188 PetscInt m = *mm, n = *mn; 5189 InsertMode addv = *maddv; 5190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5191 PetscScalar value; 5192 PetscErrorCode ierr; 5193 5194 MatCheckPreallocated(mat,1); 5195 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5196 5197 #if defined(PETSC_USE_DEBUG) 5198 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5199 #endif 5200 { 5201 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5202 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5203 PetscBool roworiented = aij->roworiented; 5204 5205 /* Some Variables required in the macro */ 5206 Mat A = aij->A; 5207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5208 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5209 MatScalar *aa = a->a; 5210 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5211 Mat B = aij->B; 5212 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5213 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5214 MatScalar *ba = b->a; 5215 5216 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5217 PetscInt nonew = a->nonew; 5218 MatScalar *ap1,*ap2; 5219 5220 PetscFunctionBegin; 5221 for (i=0; i<m; i++) { 5222 if (im[i] < 0) continue; 5223 #if defined(PETSC_USE_DEBUG) 5224 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5225 #endif 5226 if (im[i] >= rstart && im[i] < rend) { 5227 row = im[i] - rstart; 5228 lastcol1 = -1; 5229 rp1 = aj + ai[row]; 5230 ap1 = aa + ai[row]; 5231 rmax1 = aimax[row]; 5232 nrow1 = ailen[row]; 5233 low1 = 0; 5234 high1 = nrow1; 5235 lastcol2 = -1; 5236 rp2 = bj + bi[row]; 5237 ap2 = ba + bi[row]; 5238 rmax2 = bimax[row]; 5239 nrow2 = bilen[row]; 5240 low2 = 0; 5241 high2 = nrow2; 5242 5243 for (j=0; j<n; j++) { 5244 if (roworiented) value = v[i*n+j]; 5245 else value = v[i+j*m]; 5246 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5247 if (in[j] >= cstart && in[j] < cend) { 5248 col = in[j] - cstart; 5249 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5250 } else if (in[j] < 0) continue; 5251 #if defined(PETSC_USE_DEBUG) 5252 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5253 #endif 5254 else { 5255 if (mat->was_assembled) { 5256 if (!aij->colmap) { 5257 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5258 } 5259 #if defined(PETSC_USE_CTABLE) 5260 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5261 col--; 5262 #else 5263 col = aij->colmap[in[j]] - 1; 5264 #endif 5265 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5266 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5267 col = in[j]; 5268 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5269 B = aij->B; 5270 b = (Mat_SeqAIJ*)B->data; 5271 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5272 rp2 = bj + bi[row]; 5273 ap2 = ba + bi[row]; 5274 rmax2 = bimax[row]; 5275 nrow2 = bilen[row]; 5276 low2 = 0; 5277 high2 = nrow2; 5278 bm = aij->B->rmap->n; 5279 ba = b->a; 5280 } 5281 } else col = in[j]; 5282 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5283 } 5284 } 5285 } else if (!aij->donotstash) { 5286 if (roworiented) { 5287 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5288 } else { 5289 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5290 } 5291 } 5292 } 5293 } 5294 PetscFunctionReturnVoid(); 5295 } 5296 5297