1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1162 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1165 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1166 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1167 PetscErrorCode ierr; 1168 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1169 int fd; 1170 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1171 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1172 PetscScalar *column_values; 1173 PetscInt message_count,flowcontrolcount; 1174 FILE *file; 1175 1176 PetscFunctionBegin; 1177 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1178 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1179 nz = A->nz + B->nz; 1180 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1181 if (!rank) { 1182 header[0] = MAT_FILE_CLASSID; 1183 header[1] = mat->rmap->N; 1184 header[2] = mat->cmap->N; 1185 1186 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 /* get largest number of rows any processor has */ 1189 rlen = mat->rmap->n; 1190 range = mat->rmap->range; 1191 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1192 } else { 1193 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1194 rlen = mat->rmap->n; 1195 } 1196 1197 /* load up the local row counts */ 1198 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1199 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1200 1201 /* store the row lengths to the file */ 1202 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1203 if (!rank) { 1204 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 for (i=1; i<size; i++) { 1206 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1207 rlen = range[i+1] - range[i]; 1208 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1209 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1210 } 1211 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1212 } else { 1213 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1214 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1215 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1216 } 1217 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1218 1219 /* load up the local column indices */ 1220 nzmax = nz; /* th processor needs space a largest processor needs */ 1221 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1223 cnt = 0; 1224 for (i=0; i<mat->rmap->n; i++) { 1225 for (j=B->i[i]; j<B->i[i+1]; j++) { 1226 if ((col = garray[B->j[j]]) > cstart) break; 1227 column_indices[cnt++] = col; 1228 } 1229 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1230 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1231 } 1232 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1233 1234 /* store the column indices to the file */ 1235 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1236 if (!rank) { 1237 MPI_Status status; 1238 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1239 for (i=1; i<size; i++) { 1240 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1241 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1242 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1243 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1245 } 1246 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1247 } else { 1248 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1249 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1254 1255 /* load up the local column values */ 1256 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1257 cnt = 0; 1258 for (i=0; i<mat->rmap->n; i++) { 1259 for (j=B->i[i]; j<B->i[i+1]; j++) { 1260 if (garray[B->j[j]] > cstart) break; 1261 column_values[cnt++] = B->a[j]; 1262 } 1263 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1264 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1265 } 1266 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1267 1268 /* store the column values to the file */ 1269 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1270 if (!rank) { 1271 MPI_Status status; 1272 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1273 for (i=1; i<size; i++) { 1274 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1275 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1276 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1277 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1279 } 1280 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1281 } else { 1282 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1283 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1284 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree(column_values);CHKERRQ(ierr); 1288 1289 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1290 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #include <petscdraw.h> 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1297 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1298 { 1299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1300 PetscErrorCode ierr; 1301 PetscMPIInt rank = aij->rank,size = aij->size; 1302 PetscBool isdraw,iascii,isbinary; 1303 PetscViewer sviewer; 1304 PetscViewerFormat format; 1305 1306 PetscFunctionBegin; 1307 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1310 if (iascii) { 1311 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscBool inodes; 1315 1316 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1317 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1318 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1320 if (!inodes) { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1326 } 1327 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1331 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1332 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1334 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1339 if (inodes) { 1340 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1351 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1352 } else { 1353 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (isdraw) { 1357 PetscDraw draw; 1358 PetscBool isnull; 1359 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1360 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1361 if (isnull) PetscFunctionReturn(0); 1362 } 1363 1364 { 1365 /* assemble the entire matrix onto first processor. */ 1366 Mat A; 1367 Mat_SeqAIJ *Aloc; 1368 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1369 MatScalar *a; 1370 1371 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1372 if (!rank) { 1373 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1374 } else { 1375 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1376 } 1377 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1378 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1379 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1380 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1381 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1382 1383 /* copy over the A part */ 1384 Aloc = (Mat_SeqAIJ*)aij->A->data; 1385 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1390 row++; 1391 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1392 } 1393 aj = Aloc->j; 1394 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1395 1396 /* copy over the B part */ 1397 Aloc = (Mat_SeqAIJ*)aij->B->data; 1398 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1399 row = mat->rmap->rstart; 1400 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1401 ct = cols; 1402 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; 1406 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1407 } 1408 ierr = PetscFree(ct);CHKERRQ(ierr); 1409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 /* 1412 Everyone has to call to draw the matrix since the graphics waits are 1413 synchronized across all processors that share the PetscDraw object 1414 */ 1415 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1418 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1421 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1422 ierr = MatDestroy(&A);CHKERRQ(ierr); 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatView_MPIAIJ" 1429 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1430 { 1431 PetscErrorCode ierr; 1432 PetscBool iascii,isdraw,issocket,isbinary; 1433 1434 PetscFunctionBegin; 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1439 if (iascii || isdraw || isbinary || issocket) { 1440 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSOR_MPIAIJ" 1447 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1448 { 1449 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1450 PetscErrorCode ierr; 1451 Vec bb1 = 0; 1452 PetscBool hasop; 1453 1454 PetscFunctionBegin; 1455 if (flag == SOR_APPLY_UPPER) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1461 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1462 } 1463 1464 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1465 if (flag & SOR_ZERO_INITIAL_GUESS) { 1466 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1467 its--; 1468 } 1469 1470 while (its--) { 1471 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 1474 /* update rhs: bb1 = bb - B*x */ 1475 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1476 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1477 1478 /* local sweep */ 1479 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1480 } 1481 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 its--; 1485 } 1486 while (its--) { 1487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 1490 /* update rhs: bb1 = bb - B*x */ 1491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1493 1494 /* local sweep */ 1495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1496 } 1497 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1498 if (flag & SOR_ZERO_INITIAL_GUESS) { 1499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1500 its--; 1501 } 1502 while (its--) { 1503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 1506 /* update rhs: bb1 = bb - B*x */ 1507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1512 } 1513 } else if (flag & SOR_EISENSTAT) { 1514 Vec xx1; 1515 1516 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1518 1519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 if (!mat->diag) { 1522 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1523 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1524 } 1525 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1526 if (hasop) { 1527 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1528 } else { 1529 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1530 } 1531 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1532 1533 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1537 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1538 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1539 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1540 1541 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1542 1543 matin->factorerrortype = mat->A->factorerrortype; 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatPermute_MPIAIJ" 1549 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1550 { 1551 Mat aA,aB,Aperm; 1552 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1553 PetscScalar *aa,*ba; 1554 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1555 PetscSF rowsf,sf; 1556 IS parcolp = NULL; 1557 PetscBool done; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1562 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1563 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1564 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1565 1566 /* Invert row permutation to find out where my rows should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1570 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1571 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 1574 /* Invert column permutation to find out where my columns should go */ 1575 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1576 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1577 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1578 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1579 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1580 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1584 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1585 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1586 1587 /* Find out where my gcols should go */ 1588 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1589 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1594 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1595 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1596 1597 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1598 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1599 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1600 for (i=0; i<m; i++) { 1601 PetscInt row = rdest[i],rowner; 1602 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1603 for (j=ai[i]; j<ai[i+1]; j++) { 1604 PetscInt cowner,col = cdest[aj[j]]; 1605 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1606 if (rowner == cowner) dnnz[i]++; 1607 else onnz[i]++; 1608 } 1609 for (j=bi[i]; j<bi[i+1]; j++) { 1610 PetscInt cowner,col = gcdest[bj[j]]; 1611 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1612 if (rowner == cowner) dnnz[i]++; 1613 else onnz[i]++; 1614 } 1615 } 1616 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1617 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1621 1622 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1623 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1624 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1627 PetscInt j0,rowlen; 1628 rowlen = ai[i+1] - ai[i]; 1629 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1630 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1631 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1632 } 1633 rowlen = bi[i+1] - bi[i]; 1634 for (j0=j=0; j<rowlen; j0=j) { 1635 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1636 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1637 } 1638 } 1639 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1642 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1643 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1644 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1645 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1646 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1647 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1648 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1649 *B = Aperm; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1655 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1656 { 1657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1662 if (ghosts) *ghosts = aij->garray; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1668 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1669 { 1670 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1671 Mat A = mat->A,B = mat->B; 1672 PetscErrorCode ierr; 1673 PetscReal isend[5],irecv[5]; 1674 1675 PetscFunctionBegin; 1676 info->block_size = 1.0; 1677 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1678 1679 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1680 isend[3] = info->memory; isend[4] = info->mallocs; 1681 1682 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1683 1684 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1685 isend[3] += info->memory; isend[4] += info->mallocs; 1686 if (flag == MAT_LOCAL) { 1687 info->nz_used = isend[0]; 1688 info->nz_allocated = isend[1]; 1689 info->nz_unneeded = isend[2]; 1690 info->memory = isend[3]; 1691 info->mallocs = isend[4]; 1692 } else if (flag == MAT_GLOBAL_MAX) { 1693 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1694 1695 info->nz_used = irecv[0]; 1696 info->nz_allocated = irecv[1]; 1697 info->nz_unneeded = irecv[2]; 1698 info->memory = irecv[3]; 1699 info->mallocs = irecv[4]; 1700 } else if (flag == MAT_GLOBAL_SUM) { 1701 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1702 1703 info->nz_used = irecv[0]; 1704 info->nz_allocated = irecv[1]; 1705 info->nz_unneeded = irecv[2]; 1706 info->memory = irecv[3]; 1707 info->mallocs = irecv[4]; 1708 } 1709 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1710 info->fill_ratio_needed = 0; 1711 info->factor_mallocs = 0; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatSetOption_MPIAIJ" 1717 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1718 { 1719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 switch (op) { 1724 case MAT_NEW_NONZERO_LOCATIONS: 1725 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1726 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1727 case MAT_KEEP_NONZERO_PATTERN: 1728 case MAT_NEW_NONZERO_LOCATION_ERR: 1729 case MAT_USE_INODES: 1730 case MAT_IGNORE_ZERO_ENTRIES: 1731 MatCheckPreallocated(A,1); 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_ROW_ORIENTED: 1736 MatCheckPreallocated(A,1); 1737 a->roworiented = flg; 1738 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_NEW_DIAGONALS: 1743 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1744 break; 1745 case MAT_IGNORE_OFF_PROC_ENTRIES: 1746 a->donotstash = flg; 1747 break; 1748 case MAT_SPD: 1749 A->spd_set = PETSC_TRUE; 1750 A->spd = flg; 1751 if (flg) { 1752 A->symmetric = PETSC_TRUE; 1753 A->structurally_symmetric = PETSC_TRUE; 1754 A->symmetric_set = PETSC_TRUE; 1755 A->structurally_symmetric_set = PETSC_TRUE; 1756 } 1757 break; 1758 case MAT_SYMMETRIC: 1759 MatCheckPreallocated(A,1); 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_STRUCTURALLY_SYMMETRIC: 1763 MatCheckPreallocated(A,1); 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 break; 1766 case MAT_HERMITIAN: 1767 MatCheckPreallocated(A,1); 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_SYMMETRY_ETERNAL: 1771 MatCheckPreallocated(A,1); 1772 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1773 break; 1774 default: 1775 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatGetRow_MPIAIJ" 1782 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1783 { 1784 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1785 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1786 PetscErrorCode ierr; 1787 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1788 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1789 PetscInt *cmap,*idx_p; 1790 1791 PetscFunctionBegin; 1792 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1793 mat->getrowactive = PETSC_TRUE; 1794 1795 if (!mat->rowvalues && (idx || v)) { 1796 /* 1797 allocate enough space to hold information from the longest row. 1798 */ 1799 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1800 PetscInt max = 1,tmp; 1801 for (i=0; i<matin->rmap->n; i++) { 1802 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1803 if (max < tmp) max = tmp; 1804 } 1805 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1806 } 1807 1808 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1809 lrow = row - rstart; 1810 1811 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1812 if (!v) {pvA = 0; pvB = 0;} 1813 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1814 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1815 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1816 nztot = nzA + nzB; 1817 1818 cmap = mat->garray; 1819 if (v || idx) { 1820 if (nztot) { 1821 /* Sort by increasing column numbers, assuming A and B already sorted */ 1822 PetscInt imark = -1; 1823 if (v) { 1824 *v = v_p = mat->rowvalues; 1825 for (i=0; i<nzB; i++) { 1826 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1827 else break; 1828 } 1829 imark = i; 1830 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1831 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1832 } 1833 if (idx) { 1834 *idx = idx_p = mat->rowindices; 1835 if (imark > -1) { 1836 for (i=0; i<imark; i++) { 1837 idx_p[i] = cmap[cworkB[i]]; 1838 } 1839 } else { 1840 for (i=0; i<nzB; i++) { 1841 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1842 else break; 1843 } 1844 imark = i; 1845 } 1846 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1847 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1848 } 1849 } else { 1850 if (idx) *idx = 0; 1851 if (v) *v = 0; 1852 } 1853 } 1854 *nz = nztot; 1855 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1856 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1862 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1863 { 1864 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1865 1866 PetscFunctionBegin; 1867 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1868 aij->getrowactive = PETSC_FALSE; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatNorm_MPIAIJ" 1874 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1875 { 1876 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1877 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1878 PetscErrorCode ierr; 1879 PetscInt i,j,cstart = mat->cmap->rstart; 1880 PetscReal sum = 0.0; 1881 MatScalar *v; 1882 1883 PetscFunctionBegin; 1884 if (aij->size == 1) { 1885 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1886 } else { 1887 if (type == NORM_FROBENIUS) { 1888 v = amat->a; 1889 for (i=0; i<amat->nz; i++) { 1890 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1891 } 1892 v = bmat->a; 1893 for (i=0; i<bmat->nz; i++) { 1894 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1895 } 1896 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 *norm = PetscSqrtReal(*norm); 1898 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1899 } else if (type == NORM_1) { /* max column norm */ 1900 PetscReal *tmp,*tmp2; 1901 PetscInt *jj,*garray = aij->garray; 1902 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1903 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1904 *norm = 0.0; 1905 v = amat->a; jj = amat->j; 1906 for (j=0; j<amat->nz; j++) { 1907 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1908 } 1909 v = bmat->a; jj = bmat->j; 1910 for (j=0; j<bmat->nz; j++) { 1911 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1912 } 1913 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1914 for (j=0; j<mat->cmap->N; j++) { 1915 if (tmp2[j] > *norm) *norm = tmp2[j]; 1916 } 1917 ierr = PetscFree(tmp);CHKERRQ(ierr); 1918 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1919 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1920 } else if (type == NORM_INFINITY) { /* max row norm */ 1921 PetscReal ntemp = 0.0; 1922 for (j=0; j<aij->A->rmap->n; j++) { 1923 v = amat->a + amat->i[j]; 1924 sum = 0.0; 1925 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1926 sum += PetscAbsScalar(*v); v++; 1927 } 1928 v = bmat->a + bmat->i[j]; 1929 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1930 sum += PetscAbsScalar(*v); v++; 1931 } 1932 if (sum > ntemp) ntemp = sum; 1933 } 1934 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1936 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatTranspose_MPIAIJ" 1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1944 { 1945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1946 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1947 PetscErrorCode ierr; 1948 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1949 PetscInt cstart = A->cmap->rstart,ncol; 1950 Mat B; 1951 MatScalar *array; 1952 1953 PetscFunctionBegin; 1954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1955 1956 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1957 ai = Aloc->i; aj = Aloc->j; 1958 bi = Bloc->i; bj = Bloc->j; 1959 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1960 PetscInt *d_nnz,*g_nnz,*o_nnz; 1961 PetscSFNode *oloc; 1962 PETSC_UNUSED PetscSF sf; 1963 1964 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1965 /* compute d_nnz for preallocation */ 1966 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) { 1968 d_nnz[aj[i]]++; 1969 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1970 } 1971 /* compute local off-diagonal contributions */ 1972 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1973 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1974 /* map those to global */ 1975 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1976 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1977 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1978 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1980 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1982 1983 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1984 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1985 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1986 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1987 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1988 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1989 } else { 1990 B = *matout; 1991 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1993 } 1994 1995 /* copy over the A part */ 1996 array = Aloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<ma; i++) { 1999 ncol = ai[i+1]-ai[i]; 2000 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2001 row++; 2002 array += ncol; aj += ncol; 2003 } 2004 aj = Aloc->j; 2005 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2006 2007 /* copy over the B part */ 2008 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2009 array = Bloc->a; 2010 row = A->rmap->rstart; 2011 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2012 cols_tmp = cols; 2013 for (i=0; i<mb; i++) { 2014 ncol = bi[i+1]-bi[i]; 2015 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; cols_tmp += ncol; 2018 } 2019 ierr = PetscFree(cols);CHKERRQ(ierr); 2020 2021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2024 *matout = B; 2025 } else { 2026 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2034 { 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat a = aij->A,b = aij->B; 2037 PetscErrorCode ierr; 2038 PetscInt s1,s2,s3; 2039 2040 PetscFunctionBegin; 2041 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2042 if (rr) { 2043 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2044 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2045 /* Overlap communication with computation. */ 2046 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 } 2048 if (ll) { 2049 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2050 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2051 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2052 } 2053 /* scale the diagonal block */ 2054 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2055 2056 if (rr) { 2057 /* Do a scatter end and then right scale the off-diagonal block */ 2058 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2059 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2067 { 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatEqual_MPIAIJ" 2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2079 { 2080 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2081 Mat a,b,c,d; 2082 PetscBool flg; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 a = matA->A; b = matA->B; 2087 c = matB->A; d = matB->B; 2088 2089 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2090 if (flg) { 2091 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2092 } 2093 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatCopy_MPIAIJ" 2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2100 { 2101 PetscErrorCode ierr; 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2104 2105 PetscFunctionBegin; 2106 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2107 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2108 /* because of the column compression in the off-processor part of the matrix a->B, 2109 the number of columns in a->B and b->B may be different, hence we cannot call 2110 the MatCopy() directly on the two parts. If need be, we can provide a more 2111 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2112 then copying the submatrices */ 2113 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2116 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetUp_MPIAIJ" 2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2124 { 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /* 2133 Computes the number of nonzeros per row needed for preallocation when X and Y 2134 have different nonzero structure. 2135 */ 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2139 { 2140 PetscInt i,j,k,nzx,nzy; 2141 2142 PetscFunctionBegin; 2143 /* Set the number of nonzeros in the new matrix */ 2144 for (i=0; i<m; i++) { 2145 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2146 nzx = xi[i+1] - xi[i]; 2147 nzy = yi[i+1] - yi[i]; 2148 nnz[i] = 0; 2149 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2150 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2151 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2152 nnz[i]++; 2153 } 2154 for (; k<nzy; k++) nnz[i]++; 2155 } 2156 PetscFunctionReturn(0); 2157 } 2158 2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2163 { 2164 PetscErrorCode ierr; 2165 PetscInt m = Y->rmap->N; 2166 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2167 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2168 2169 PetscFunctionBegin; 2170 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatAXPY_MPIAIJ" 2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2177 { 2178 PetscErrorCode ierr; 2179 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2180 PetscBLASInt bnz,one=1; 2181 Mat_SeqAIJ *x,*y; 2182 2183 PetscFunctionBegin; 2184 if (str == SAME_NONZERO_PATTERN) { 2185 PetscScalar alpha = a; 2186 x = (Mat_SeqAIJ*)xx->A->data; 2187 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2188 y = (Mat_SeqAIJ*)yy->A->data; 2189 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2193 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2194 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2195 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2196 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2197 } else { 2198 Mat B; 2199 PetscInt *nnz_d,*nnz_o; 2200 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2201 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2202 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2204 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2205 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2206 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2207 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2209 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2210 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2211 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2212 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatConjugate_MPIAIJ" 2222 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2223 { 2224 #if defined(PETSC_USE_COMPLEX) 2225 PetscErrorCode ierr; 2226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2227 2228 PetscFunctionBegin; 2229 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2230 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2231 #else 2232 PetscFunctionBegin; 2233 #endif 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatRealPart_MPIAIJ" 2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2246 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2253 { 2254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2255 PetscErrorCode ierr; 2256 2257 PetscFunctionBegin; 2258 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2259 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2265 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 PetscInt i,*idxb = 0; 2270 PetscScalar *va,*vb; 2271 Vec vtmp; 2272 2273 PetscFunctionBegin; 2274 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2275 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2276 if (idx) { 2277 for (i=0; i<A->rmap->n; i++) { 2278 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2279 } 2280 } 2281 2282 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2283 if (idx) { 2284 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2285 } 2286 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2287 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2288 2289 for (i=0; i<A->rmap->n; i++) { 2290 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2291 va[i] = vb[i]; 2292 if (idx) idx[i] = a->garray[idxb[i]]; 2293 } 2294 } 2295 2296 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2298 ierr = PetscFree(idxb);CHKERRQ(ierr); 2299 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2305 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,*idxb = 0; 2310 PetscScalar *va,*vb; 2311 Vec vtmp; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<A->cmap->n; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2323 if (idx) { 2324 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2325 } 2326 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2327 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2328 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2331 va[i] = vb[i]; 2332 if (idx) idx[i] = a->garray[idxb[i]]; 2333 } 2334 } 2335 2336 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2338 ierr = PetscFree(idxb);CHKERRQ(ierr); 2339 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2348 PetscInt n = A->rmap->n; 2349 PetscInt cstart = A->cmap->rstart; 2350 PetscInt *cmap = mat->garray; 2351 PetscInt *diagIdx, *offdiagIdx; 2352 Vec diagV, offdiagV; 2353 PetscScalar *a, *diagA, *offdiagA; 2354 PetscInt r; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2361 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 for (r = 0; r < n; ++r) { 2367 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2368 a[r] = diagA[r]; 2369 idx[r] = cstart + diagIdx[r]; 2370 } else { 2371 a[r] = offdiagA[r]; 2372 idx[r] = cmap[offdiagIdx[r]]; 2373 } 2374 } 2375 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2380 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2386 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2387 { 2388 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2389 PetscInt n = A->rmap->n; 2390 PetscInt cstart = A->cmap->rstart; 2391 PetscInt *cmap = mat->garray; 2392 PetscInt *diagIdx, *offdiagIdx; 2393 Vec diagV, offdiagV; 2394 PetscScalar *a, *diagA, *offdiagA; 2395 PetscInt r; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2402 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2403 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 for (r = 0; r < n; ++r) { 2408 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 idx[r] = cstart + diagIdx[r]; 2411 } else { 2412 a[r] = offdiagA[r]; 2413 idx[r] = cmap[offdiagIdx[r]]; 2414 } 2415 } 2416 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2427 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2428 { 2429 PetscErrorCode ierr; 2430 Mat *dummy; 2431 2432 PetscFunctionBegin; 2433 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2434 *newmat = *dummy; 2435 ierr = PetscFree(dummy);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2441 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2442 { 2443 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2448 A->factorerrortype = a->A->factorerrortype; 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2454 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2455 { 2456 PetscErrorCode ierr; 2457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2458 2459 PetscFunctionBegin; 2460 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2461 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2462 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2463 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2469 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2470 { 2471 PetscFunctionBegin; 2472 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2473 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2479 /*@ 2480 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2481 2482 Collective on Mat 2483 2484 Input Parameters: 2485 + A - the matrix 2486 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2487 2488 Level: advanced 2489 2490 @*/ 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2502 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2503 { 2504 PetscErrorCode ierr; 2505 PetscBool sc = PETSC_FALSE,flg; 2506 2507 PetscFunctionBegin; 2508 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2509 ierr = PetscObjectOptionsBegin((PetscObject)A); 2510 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2511 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2512 if (flg) { 2513 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2514 } 2515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatShift_MPIAIJ" 2521 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2525 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2526 2527 PetscFunctionBegin; 2528 if (!Y->preallocated) { 2529 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2530 } else if (!aij->nz) { 2531 PetscInt nonew = aij->nonew; 2532 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2533 aij->nonew = nonew; 2534 } 2535 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2541 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2542 { 2543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2548 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2549 if (d) { 2550 PetscInt rstart; 2551 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2552 *d += rstart; 2553 2554 } 2555 PetscFunctionReturn(0); 2556 } 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2561 MatGetRow_MPIAIJ, 2562 MatRestoreRow_MPIAIJ, 2563 MatMult_MPIAIJ, 2564 /* 4*/ MatMultAdd_MPIAIJ, 2565 MatMultTranspose_MPIAIJ, 2566 MatMultTransposeAdd_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIAIJ, 2574 MatTranspose_MPIAIJ, 2575 /*15*/ MatGetInfo_MPIAIJ, 2576 MatEqual_MPIAIJ, 2577 MatGetDiagonal_MPIAIJ, 2578 MatDiagonalScale_MPIAIJ, 2579 MatNorm_MPIAIJ, 2580 /*20*/ MatAssemblyBegin_MPIAIJ, 2581 MatAssemblyEnd_MPIAIJ, 2582 MatSetOption_MPIAIJ, 2583 MatZeroEntries_MPIAIJ, 2584 /*24*/ MatZeroRows_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIAIJ, 2590 0, 2591 0, 2592 MatGetDiagonalBlock_MPIAIJ, 2593 0, 2594 /*34*/ MatDuplicate_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*39*/ MatAXPY_MPIAIJ, 2600 MatGetSubMatrices_MPIAIJ, 2601 MatIncreaseOverlap_MPIAIJ, 2602 MatGetValues_MPIAIJ, 2603 MatCopy_MPIAIJ, 2604 /*44*/ MatGetRowMax_MPIAIJ, 2605 MatScale_MPIAIJ, 2606 MatShift_MPIAIJ, 2607 MatDiagonalSet_MPIAIJ, 2608 MatZeroRowsColumns_MPIAIJ, 2609 /*49*/ MatSetRandom_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*54*/ MatFDColoringCreate_MPIXAIJ, 2615 0, 2616 MatSetUnfactored_MPIAIJ, 2617 MatPermute_MPIAIJ, 2618 0, 2619 /*59*/ MatGetSubMatrix_MPIAIJ, 2620 MatDestroy_MPIAIJ, 2621 MatView_MPIAIJ, 2622 0, 2623 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2624 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2625 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2630 MatGetRowMinAbs_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*75*/ MatFDColoringApply_AIJ, 2636 MatSetFromOptions_MPIAIJ, 2637 0, 2638 0, 2639 MatFindZeroDiagonals_MPIAIJ, 2640 /*80*/ 0, 2641 0, 2642 0, 2643 /*83*/ MatLoad_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2650 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2651 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2652 MatPtAP_MPIAIJ_MPIAIJ, 2653 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2654 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*99*/ 0, 2660 0, 2661 0, 2662 MatConjugate_MPIAIJ, 2663 0, 2664 /*104*/MatSetValuesRow_MPIAIJ, 2665 MatRealPart_MPIAIJ, 2666 MatImaginaryPart_MPIAIJ, 2667 0, 2668 0, 2669 /*109*/0, 2670 0, 2671 MatGetRowMin_MPIAIJ, 2672 0, 2673 MatMissingDiagonal_MPIAIJ, 2674 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2675 0, 2676 MatGetGhosts_MPIAIJ, 2677 0, 2678 0, 2679 /*119*/0, 2680 0, 2681 0, 2682 0, 2683 MatGetMultiProcBlock_MPIAIJ, 2684 /*124*/MatFindNonzeroRows_MPIAIJ, 2685 MatGetColumnNorms_MPIAIJ, 2686 MatInvertBlockDiagonal_MPIAIJ, 2687 0, 2688 MatGetSubMatricesMPI_MPIAIJ, 2689 /*129*/0, 2690 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2691 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2692 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2693 0, 2694 /*134*/0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*139*/0, 2700 0, 2701 0, 2702 MatFDColoringSetUp_MPIXAIJ, 2703 MatFindOffBlockDiagonalEntries_MPIAIJ, 2704 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2705 }; 2706 2707 /* ----------------------------------------------------------------------------------------*/ 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2711 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2712 { 2713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2718 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2737 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2738 { 2739 Mat_MPIAIJ *b; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745 b = (Mat_MPIAIJ*)B->data; 2746 2747 #if defined(PETSC_USE_CTABLE) 2748 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2749 #else 2750 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2751 #endif 2752 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2753 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2754 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2755 2756 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2757 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2758 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2759 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2760 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2761 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2762 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2763 2764 if (!B->preallocated) { 2765 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2766 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2767 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2768 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2770 } 2771 2772 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2773 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2774 B->preallocated = PETSC_TRUE; 2775 B->was_assembled = PETSC_FALSE; 2776 B->assembled = PETSC_FALSE;; 2777 PetscFunctionReturn(0); 2778 } 2779 2780 #undef __FUNCT__ 2781 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2782 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2783 { 2784 Mat mat; 2785 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2786 PetscErrorCode ierr; 2787 2788 PetscFunctionBegin; 2789 *newmat = 0; 2790 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2791 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2792 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2793 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2794 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2795 a = (Mat_MPIAIJ*)mat->data; 2796 2797 mat->factortype = matin->factortype; 2798 mat->assembled = PETSC_TRUE; 2799 mat->insertmode = NOT_SET_VALUES; 2800 mat->preallocated = PETSC_TRUE; 2801 2802 a->size = oldmat->size; 2803 a->rank = oldmat->rank; 2804 a->donotstash = oldmat->donotstash; 2805 a->roworiented = oldmat->roworiented; 2806 a->rowindices = 0; 2807 a->rowvalues = 0; 2808 a->getrowactive = PETSC_FALSE; 2809 2810 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2811 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2812 2813 if (oldmat->colmap) { 2814 #if defined(PETSC_USE_CTABLE) 2815 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2816 #else 2817 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2818 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2819 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2820 #endif 2821 } else a->colmap = 0; 2822 if (oldmat->garray) { 2823 PetscInt len; 2824 len = oldmat->B->cmap->n; 2825 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2826 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2827 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2828 } else a->garray = 0; 2829 2830 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2831 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2832 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2833 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2834 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2835 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2836 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2837 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2838 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2839 *newmat = mat; 2840 PetscFunctionReturn(0); 2841 } 2842 2843 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatLoad_MPIAIJ" 2847 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2848 { 2849 PetscScalar *vals,*svals; 2850 MPI_Comm comm; 2851 PetscErrorCode ierr; 2852 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2853 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2854 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2855 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2856 PetscInt cend,cstart,n,*rowners; 2857 int fd; 2858 PetscInt bs = newMat->rmap->bs; 2859 2860 PetscFunctionBegin; 2861 /* force binary viewer to load .info file if it has not yet done so */ 2862 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2863 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2864 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2866 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2867 if (!rank) { 2868 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2869 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2870 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2871 } 2872 2873 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2874 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2875 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2876 if (bs < 0) bs = 1; 2877 2878 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2879 M = header[1]; N = header[2]; 2880 2881 /* If global sizes are set, check if they are consistent with that given in the file */ 2882 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2883 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2884 2885 /* determine ownership of all (block) rows */ 2886 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2887 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2888 else m = newMat->rmap->n; /* Set by user */ 2889 2890 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2891 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2892 2893 /* First process needs enough room for process with most rows */ 2894 if (!rank) { 2895 mmax = rowners[1]; 2896 for (i=2; i<=size; i++) { 2897 mmax = PetscMax(mmax, rowners[i]); 2898 } 2899 } else mmax = -1; /* unused, but compilers complain */ 2900 2901 rowners[0] = 0; 2902 for (i=2; i<=size; i++) { 2903 rowners[i] += rowners[i-1]; 2904 } 2905 rstart = rowners[rank]; 2906 rend = rowners[rank+1]; 2907 2908 /* distribute row lengths to all processors */ 2909 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2910 if (!rank) { 2911 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2912 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2913 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2914 for (j=0; j<m; j++) { 2915 procsnz[0] += ourlens[j]; 2916 } 2917 for (i=1; i<size; i++) { 2918 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2919 /* calculate the number of nonzeros on each processor */ 2920 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2921 procsnz[i] += rowlengths[j]; 2922 } 2923 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2924 } 2925 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2926 } else { 2927 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2928 } 2929 2930 if (!rank) { 2931 /* determine max buffer needed and allocate it */ 2932 maxnz = 0; 2933 for (i=0; i<size; i++) { 2934 maxnz = PetscMax(maxnz,procsnz[i]); 2935 } 2936 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2937 2938 /* read in my part of the matrix column indices */ 2939 nz = procsnz[0]; 2940 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2941 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2942 2943 /* read in every one elses and ship off */ 2944 for (i=1; i<size; i++) { 2945 nz = procsnz[i]; 2946 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2947 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2948 } 2949 ierr = PetscFree(cols);CHKERRQ(ierr); 2950 } else { 2951 /* determine buffer space needed for message */ 2952 nz = 0; 2953 for (i=0; i<m; i++) { 2954 nz += ourlens[i]; 2955 } 2956 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2957 2958 /* receive message of column indices*/ 2959 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2960 } 2961 2962 /* determine column ownership if matrix is not square */ 2963 if (N != M) { 2964 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2965 else n = newMat->cmap->n; 2966 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2967 cstart = cend - n; 2968 } else { 2969 cstart = rstart; 2970 cend = rend; 2971 n = cend - cstart; 2972 } 2973 2974 /* loop over local rows, determining number of off diagonal entries */ 2975 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2976 jj = 0; 2977 for (i=0; i<m; i++) { 2978 for (j=0; j<ourlens[i]; j++) { 2979 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2980 jj++; 2981 } 2982 } 2983 2984 for (i=0; i<m; i++) { 2985 ourlens[i] -= offlens[i]; 2986 } 2987 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2988 2989 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2990 2991 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2992 2993 for (i=0; i<m; i++) { 2994 ourlens[i] += offlens[i]; 2995 } 2996 2997 if (!rank) { 2998 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2999 3000 /* read in my part of the matrix numerical values */ 3001 nz = procsnz[0]; 3002 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3003 3004 /* insert into matrix */ 3005 jj = rstart; 3006 smycols = mycols; 3007 svals = vals; 3008 for (i=0; i<m; i++) { 3009 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3010 smycols += ourlens[i]; 3011 svals += ourlens[i]; 3012 jj++; 3013 } 3014 3015 /* read in other processors and ship out */ 3016 for (i=1; i<size; i++) { 3017 nz = procsnz[i]; 3018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3019 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3020 } 3021 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3022 } else { 3023 /* receive numeric values */ 3024 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3025 3026 /* receive message of values*/ 3027 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3028 3029 /* insert into matrix */ 3030 jj = rstart; 3031 smycols = mycols; 3032 svals = vals; 3033 for (i=0; i<m; i++) { 3034 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3035 smycols += ourlens[i]; 3036 svals += ourlens[i]; 3037 jj++; 3038 } 3039 } 3040 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3041 ierr = PetscFree(vals);CHKERRQ(ierr); 3042 ierr = PetscFree(mycols);CHKERRQ(ierr); 3043 ierr = PetscFree(rowners);CHKERRQ(ierr); 3044 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3045 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3051 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3052 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3053 { 3054 PetscErrorCode ierr; 3055 IS iscol_local; 3056 PetscInt csize; 3057 3058 PetscFunctionBegin; 3059 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3060 if (call == MAT_REUSE_MATRIX) { 3061 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3062 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3063 } else { 3064 /* check if we are grabbing all columns*/ 3065 PetscBool isstride; 3066 PetscMPIInt lisstride = 0,gisstride; 3067 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3068 if (isstride) { 3069 PetscInt start,len,mstart,mlen; 3070 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3071 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3072 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3073 if (mstart == start && mlen-mstart == len) lisstride = 1; 3074 } 3075 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3076 if (gisstride) { 3077 PetscInt N; 3078 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3079 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3080 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3081 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3082 } else { 3083 PetscInt cbs; 3084 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3085 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3086 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3087 } 3088 } 3089 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3090 if (call == MAT_INITIAL_MATRIX) { 3091 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3092 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3093 } 3094 PetscFunctionReturn(0); 3095 } 3096 3097 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3100 /* 3101 Not great since it makes two copies of the submatrix, first an SeqAIJ 3102 in local and then by concatenating the local matrices the end result. 3103 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3104 3105 Note: This requires a sequential iscol with all indices. 3106 */ 3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3108 { 3109 PetscErrorCode ierr; 3110 PetscMPIInt rank,size; 3111 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3112 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3113 PetscBool allcolumns, colflag; 3114 Mat M,Mreuse; 3115 MatScalar *vwork,*aa; 3116 MPI_Comm comm; 3117 Mat_SeqAIJ *aij; 3118 3119 PetscFunctionBegin; 3120 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3121 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3122 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3123 3124 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3125 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3126 if (colflag && ncol == mat->cmap->N) { 3127 allcolumns = PETSC_TRUE; 3128 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3129 } else { 3130 allcolumns = PETSC_FALSE; 3131 } 3132 if (call == MAT_REUSE_MATRIX) { 3133 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3134 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3135 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3136 } else { 3137 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3138 } 3139 3140 /* 3141 m - number of local rows 3142 n - number of columns (same on all processors) 3143 rstart - first row in new global matrix generated 3144 */ 3145 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3146 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3147 if (call == MAT_INITIAL_MATRIX) { 3148 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3149 ii = aij->i; 3150 jj = aij->j; 3151 3152 /* 3153 Determine the number of non-zeros in the diagonal and off-diagonal 3154 portions of the matrix in order to do correct preallocation 3155 */ 3156 3157 /* first get start and end of "diagonal" columns */ 3158 if (csize == PETSC_DECIDE) { 3159 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3160 if (mglobal == n) { /* square matrix */ 3161 nlocal = m; 3162 } else { 3163 nlocal = n/size + ((n % size) > rank); 3164 } 3165 } else { 3166 nlocal = csize; 3167 } 3168 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3169 rstart = rend - nlocal; 3170 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3171 3172 /* next, compute all the lengths */ 3173 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3174 olens = dlens + m; 3175 for (i=0; i<m; i++) { 3176 jend = ii[i+1] - ii[i]; 3177 olen = 0; 3178 dlen = 0; 3179 for (j=0; j<jend; j++) { 3180 if (*jj < rstart || *jj >= rend) olen++; 3181 else dlen++; 3182 jj++; 3183 } 3184 olens[i] = olen; 3185 dlens[i] = dlen; 3186 } 3187 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3188 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3189 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3190 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3191 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3192 ierr = PetscFree(dlens);CHKERRQ(ierr); 3193 } else { 3194 PetscInt ml,nl; 3195 3196 M = *newmat; 3197 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3198 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3199 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3200 /* 3201 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3202 rather than the slower MatSetValues(). 3203 */ 3204 M->was_assembled = PETSC_TRUE; 3205 M->assembled = PETSC_FALSE; 3206 } 3207 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3208 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3209 ii = aij->i; 3210 jj = aij->j; 3211 aa = aij->a; 3212 for (i=0; i<m; i++) { 3213 row = rstart + i; 3214 nz = ii[i+1] - ii[i]; 3215 cwork = jj; jj += nz; 3216 vwork = aa; aa += nz; 3217 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3218 } 3219 3220 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3221 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3222 *newmat = M; 3223 3224 /* save submatrix used in processor for next request */ 3225 if (call == MAT_INITIAL_MATRIX) { 3226 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3227 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3228 } 3229 PetscFunctionReturn(0); 3230 } 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3234 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3235 { 3236 PetscInt m,cstart, cend,j,nnz,i,d; 3237 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3238 const PetscInt *JJ; 3239 PetscScalar *values; 3240 PetscErrorCode ierr; 3241 3242 PetscFunctionBegin; 3243 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3244 3245 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3246 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3247 m = B->rmap->n; 3248 cstart = B->cmap->rstart; 3249 cend = B->cmap->rend; 3250 rstart = B->rmap->rstart; 3251 3252 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3253 3254 #if defined(PETSC_USE_DEBUGGING) 3255 for (i=0; i<m; i++) { 3256 nnz = Ii[i+1]- Ii[i]; 3257 JJ = J + Ii[i]; 3258 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3259 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3260 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3261 } 3262 #endif 3263 3264 for (i=0; i<m; i++) { 3265 nnz = Ii[i+1]- Ii[i]; 3266 JJ = J + Ii[i]; 3267 nnz_max = PetscMax(nnz_max,nnz); 3268 d = 0; 3269 for (j=0; j<nnz; j++) { 3270 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3271 } 3272 d_nnz[i] = d; 3273 o_nnz[i] = nnz - d; 3274 } 3275 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3276 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3277 3278 if (v) values = (PetscScalar*)v; 3279 else { 3280 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3281 } 3282 3283 for (i=0; i<m; i++) { 3284 ii = i + rstart; 3285 nnz = Ii[i+1]- Ii[i]; 3286 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3287 } 3288 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3289 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3290 3291 if (!v) { 3292 ierr = PetscFree(values);CHKERRQ(ierr); 3293 } 3294 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3295 PetscFunctionReturn(0); 3296 } 3297 3298 #undef __FUNCT__ 3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3300 /*@ 3301 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3302 (the default parallel PETSc format). 3303 3304 Collective on MPI_Comm 3305 3306 Input Parameters: 3307 + B - the matrix 3308 . i - the indices into j for the start of each local row (starts with zero) 3309 . j - the column indices for each local row (starts with zero) 3310 - v - optional values in the matrix 3311 3312 Level: developer 3313 3314 Notes: 3315 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3316 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3317 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3318 3319 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3320 3321 The format which is used for the sparse matrix input, is equivalent to a 3322 row-major ordering.. i.e for the following matrix, the input data expected is 3323 as shown 3324 3325 $ 1 0 0 3326 $ 2 0 3 P0 3327 $ ------- 3328 $ 4 5 6 P1 3329 $ 3330 $ Process0 [P0]: rows_owned=[0,1] 3331 $ i = {0,1,3} [size = nrow+1 = 2+1] 3332 $ j = {0,0,2} [size = 3] 3333 $ v = {1,2,3} [size = 3] 3334 $ 3335 $ Process1 [P1]: rows_owned=[2] 3336 $ i = {0,3} [size = nrow+1 = 1+1] 3337 $ j = {0,1,2} [size = 3] 3338 $ v = {4,5,6} [size = 3] 3339 3340 .keywords: matrix, aij, compressed row, sparse, parallel 3341 3342 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3343 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3344 @*/ 3345 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3346 { 3347 PetscErrorCode ierr; 3348 3349 PetscFunctionBegin; 3350 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3351 PetscFunctionReturn(0); 3352 } 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3356 /*@C 3357 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3358 (the default parallel PETSc format). For good matrix assembly performance 3359 the user should preallocate the matrix storage by setting the parameters 3360 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3361 performance can be increased by more than a factor of 50. 3362 3363 Collective on MPI_Comm 3364 3365 Input Parameters: 3366 + B - the matrix 3367 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3368 (same value is used for all local rows) 3369 . d_nnz - array containing the number of nonzeros in the various rows of the 3370 DIAGONAL portion of the local submatrix (possibly different for each row) 3371 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3372 The size of this array is equal to the number of local rows, i.e 'm'. 3373 For matrices that will be factored, you must leave room for (and set) 3374 the diagonal entry even if it is zero. 3375 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3376 submatrix (same value is used for all local rows). 3377 - o_nnz - array containing the number of nonzeros in the various rows of the 3378 OFF-DIAGONAL portion of the local submatrix (possibly different for 3379 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3380 structure. The size of this array is equal to the number 3381 of local rows, i.e 'm'. 3382 3383 If the *_nnz parameter is given then the *_nz parameter is ignored 3384 3385 The AIJ format (also called the Yale sparse matrix format or 3386 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3387 storage. The stored row and column indices begin with zero. 3388 See Users-Manual: ch_mat for details. 3389 3390 The parallel matrix is partitioned such that the first m0 rows belong to 3391 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3392 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3393 3394 The DIAGONAL portion of the local submatrix of a processor can be defined 3395 as the submatrix which is obtained by extraction the part corresponding to 3396 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3397 first row that belongs to the processor, r2 is the last row belonging to 3398 the this processor, and c1-c2 is range of indices of the local part of a 3399 vector suitable for applying the matrix to. This is an mxn matrix. In the 3400 common case of a square matrix, the row and column ranges are the same and 3401 the DIAGONAL part is also square. The remaining portion of the local 3402 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3403 3404 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3405 3406 You can call MatGetInfo() to get information on how effective the preallocation was; 3407 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3408 You can also run with the option -info and look for messages with the string 3409 malloc in them to see if additional memory allocation was needed. 3410 3411 Example usage: 3412 3413 Consider the following 8x8 matrix with 34 non-zero values, that is 3414 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3415 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3416 as follows: 3417 3418 .vb 3419 1 2 0 | 0 3 0 | 0 4 3420 Proc0 0 5 6 | 7 0 0 | 8 0 3421 9 0 10 | 11 0 0 | 12 0 3422 ------------------------------------- 3423 13 0 14 | 15 16 17 | 0 0 3424 Proc1 0 18 0 | 19 20 21 | 0 0 3425 0 0 0 | 22 23 0 | 24 0 3426 ------------------------------------- 3427 Proc2 25 26 27 | 0 0 28 | 29 0 3428 30 0 0 | 31 32 33 | 0 34 3429 .ve 3430 3431 This can be represented as a collection of submatrices as: 3432 3433 .vb 3434 A B C 3435 D E F 3436 G H I 3437 .ve 3438 3439 Where the submatrices A,B,C are owned by proc0, D,E,F are 3440 owned by proc1, G,H,I are owned by proc2. 3441 3442 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3443 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3444 The 'M','N' parameters are 8,8, and have the same values on all procs. 3445 3446 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3447 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3448 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3449 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3450 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3451 matrix, ans [DF] as another SeqAIJ matrix. 3452 3453 When d_nz, o_nz parameters are specified, d_nz storage elements are 3454 allocated for every row of the local diagonal submatrix, and o_nz 3455 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3456 One way to choose d_nz and o_nz is to use the max nonzerors per local 3457 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3458 In this case, the values of d_nz,o_nz are: 3459 .vb 3460 proc0 : dnz = 2, o_nz = 2 3461 proc1 : dnz = 3, o_nz = 2 3462 proc2 : dnz = 1, o_nz = 4 3463 .ve 3464 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3465 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3466 for proc3. i.e we are using 12+15+10=37 storage locations to store 3467 34 values. 3468 3469 When d_nnz, o_nnz parameters are specified, the storage is specified 3470 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3471 In the above case the values for d_nnz,o_nnz are: 3472 .vb 3473 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3474 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3475 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3476 .ve 3477 Here the space allocated is sum of all the above values i.e 34, and 3478 hence pre-allocation is perfect. 3479 3480 Level: intermediate 3481 3482 .keywords: matrix, aij, compressed row, sparse, parallel 3483 3484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3485 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3486 @*/ 3487 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3488 { 3489 PetscErrorCode ierr; 3490 3491 PetscFunctionBegin; 3492 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3493 PetscValidType(B,1); 3494 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3495 PetscFunctionReturn(0); 3496 } 3497 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3500 /*@ 3501 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3502 CSR format the local rows. 3503 3504 Collective on MPI_Comm 3505 3506 Input Parameters: 3507 + comm - MPI communicator 3508 . m - number of local rows (Cannot be PETSC_DECIDE) 3509 . n - This value should be the same as the local size used in creating the 3510 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3511 calculated if N is given) For square matrices n is almost always m. 3512 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3513 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3514 . i - row indices 3515 . j - column indices 3516 - a - matrix values 3517 3518 Output Parameter: 3519 . mat - the matrix 3520 3521 Level: intermediate 3522 3523 Notes: 3524 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3525 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3526 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3527 3528 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3529 3530 The format which is used for the sparse matrix input, is equivalent to a 3531 row-major ordering.. i.e for the following matrix, the input data expected is 3532 as shown 3533 3534 $ 1 0 0 3535 $ 2 0 3 P0 3536 $ ------- 3537 $ 4 5 6 P1 3538 $ 3539 $ Process0 [P0]: rows_owned=[0,1] 3540 $ i = {0,1,3} [size = nrow+1 = 2+1] 3541 $ j = {0,0,2} [size = 3] 3542 $ v = {1,2,3} [size = 3] 3543 $ 3544 $ Process1 [P1]: rows_owned=[2] 3545 $ i = {0,3} [size = nrow+1 = 1+1] 3546 $ j = {0,1,2} [size = 3] 3547 $ v = {4,5,6} [size = 3] 3548 3549 .keywords: matrix, aij, compressed row, sparse, parallel 3550 3551 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3552 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3553 @*/ 3554 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3555 { 3556 PetscErrorCode ierr; 3557 3558 PetscFunctionBegin; 3559 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3560 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3561 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3562 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3563 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3564 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3565 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 #undef __FUNCT__ 3570 #define __FUNCT__ "MatCreateAIJ" 3571 /*@C 3572 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3573 (the default parallel PETSc format). For good matrix assembly performance 3574 the user should preallocate the matrix storage by setting the parameters 3575 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3576 performance can be increased by more than a factor of 50. 3577 3578 Collective on MPI_Comm 3579 3580 Input Parameters: 3581 + comm - MPI communicator 3582 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3583 This value should be the same as the local size used in creating the 3584 y vector for the matrix-vector product y = Ax. 3585 . n - This value should be the same as the local size used in creating the 3586 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3587 calculated if N is given) For square matrices n is almost always m. 3588 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3589 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3590 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3591 (same value is used for all local rows) 3592 . d_nnz - array containing the number of nonzeros in the various rows of the 3593 DIAGONAL portion of the local submatrix (possibly different for each row) 3594 or NULL, if d_nz is used to specify the nonzero structure. 3595 The size of this array is equal to the number of local rows, i.e 'm'. 3596 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3597 submatrix (same value is used for all local rows). 3598 - o_nnz - array containing the number of nonzeros in the various rows of the 3599 OFF-DIAGONAL portion of the local submatrix (possibly different for 3600 each row) or NULL, if o_nz is used to specify the nonzero 3601 structure. The size of this array is equal to the number 3602 of local rows, i.e 'm'. 3603 3604 Output Parameter: 3605 . A - the matrix 3606 3607 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3608 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3609 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3610 3611 Notes: 3612 If the *_nnz parameter is given then the *_nz parameter is ignored 3613 3614 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3615 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3616 storage requirements for this matrix. 3617 3618 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3619 processor than it must be used on all processors that share the object for 3620 that argument. 3621 3622 The user MUST specify either the local or global matrix dimensions 3623 (possibly both). 3624 3625 The parallel matrix is partitioned across processors such that the 3626 first m0 rows belong to process 0, the next m1 rows belong to 3627 process 1, the next m2 rows belong to process 2 etc.. where 3628 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3629 values corresponding to [m x N] submatrix. 3630 3631 The columns are logically partitioned with the n0 columns belonging 3632 to 0th partition, the next n1 columns belonging to the next 3633 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3634 3635 The DIAGONAL portion of the local submatrix on any given processor 3636 is the submatrix corresponding to the rows and columns m,n 3637 corresponding to the given processor. i.e diagonal matrix on 3638 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3639 etc. The remaining portion of the local submatrix [m x (N-n)] 3640 constitute the OFF-DIAGONAL portion. The example below better 3641 illustrates this concept. 3642 3643 For a square global matrix we define each processor's diagonal portion 3644 to be its local rows and the corresponding columns (a square submatrix); 3645 each processor's off-diagonal portion encompasses the remainder of the 3646 local matrix (a rectangular submatrix). 3647 3648 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3649 3650 When calling this routine with a single process communicator, a matrix of 3651 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3652 type of communicator, use the construction mechanism: 3653 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3654 3655 By default, this format uses inodes (identical nodes) when possible. 3656 We search for consecutive rows with the same nonzero structure, thereby 3657 reusing matrix information to achieve increased efficiency. 3658 3659 Options Database Keys: 3660 + -mat_no_inode - Do not use inodes 3661 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3662 - -mat_aij_oneindex - Internally use indexing starting at 1 3663 rather than 0. Note that when calling MatSetValues(), 3664 the user still MUST index entries starting at 0! 3665 3666 3667 Example usage: 3668 3669 Consider the following 8x8 matrix with 34 non-zero values, that is 3670 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3671 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3672 as follows: 3673 3674 .vb 3675 1 2 0 | 0 3 0 | 0 4 3676 Proc0 0 5 6 | 7 0 0 | 8 0 3677 9 0 10 | 11 0 0 | 12 0 3678 ------------------------------------- 3679 13 0 14 | 15 16 17 | 0 0 3680 Proc1 0 18 0 | 19 20 21 | 0 0 3681 0 0 0 | 22 23 0 | 24 0 3682 ------------------------------------- 3683 Proc2 25 26 27 | 0 0 28 | 29 0 3684 30 0 0 | 31 32 33 | 0 34 3685 .ve 3686 3687 This can be represented as a collection of submatrices as: 3688 3689 .vb 3690 A B C 3691 D E F 3692 G H I 3693 .ve 3694 3695 Where the submatrices A,B,C are owned by proc0, D,E,F are 3696 owned by proc1, G,H,I are owned by proc2. 3697 3698 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3699 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3700 The 'M','N' parameters are 8,8, and have the same values on all procs. 3701 3702 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3703 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3704 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3705 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3706 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3707 matrix, ans [DF] as another SeqAIJ matrix. 3708 3709 When d_nz, o_nz parameters are specified, d_nz storage elements are 3710 allocated for every row of the local diagonal submatrix, and o_nz 3711 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3712 One way to choose d_nz and o_nz is to use the max nonzerors per local 3713 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3714 In this case, the values of d_nz,o_nz are: 3715 .vb 3716 proc0 : dnz = 2, o_nz = 2 3717 proc1 : dnz = 3, o_nz = 2 3718 proc2 : dnz = 1, o_nz = 4 3719 .ve 3720 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3721 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3722 for proc3. i.e we are using 12+15+10=37 storage locations to store 3723 34 values. 3724 3725 When d_nnz, o_nnz parameters are specified, the storage is specified 3726 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3727 In the above case the values for d_nnz,o_nnz are: 3728 .vb 3729 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3730 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3731 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3732 .ve 3733 Here the space allocated is sum of all the above values i.e 34, and 3734 hence pre-allocation is perfect. 3735 3736 Level: intermediate 3737 3738 .keywords: matrix, aij, compressed row, sparse, parallel 3739 3740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3741 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3742 @*/ 3743 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3744 { 3745 PetscErrorCode ierr; 3746 PetscMPIInt size; 3747 3748 PetscFunctionBegin; 3749 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3750 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3752 if (size > 1) { 3753 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3754 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3755 } else { 3756 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3757 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3758 } 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3764 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3765 { 3766 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3767 PetscBool flg; 3768 PetscErrorCode ierr; 3769 3770 PetscFunctionBegin; 3771 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3772 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3773 if (Ad) *Ad = a->A; 3774 if (Ao) *Ao = a->B; 3775 if (colmap) *colmap = a->garray; 3776 PetscFunctionReturn(0); 3777 } 3778 3779 #undef __FUNCT__ 3780 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3781 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3782 { 3783 PetscErrorCode ierr; 3784 PetscInt m,N,i,rstart,nnz,Ii; 3785 PetscInt *indx; 3786 PetscScalar *values; 3787 3788 PetscFunctionBegin; 3789 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3790 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3791 PetscInt *dnz,*onz,sum,bs,cbs; 3792 3793 if (n == PETSC_DECIDE) { 3794 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3795 } 3796 /* Check sum(n) = N */ 3797 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3798 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3799 3800 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3801 rstart -= m; 3802 3803 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3804 for (i=0; i<m; i++) { 3805 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3806 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3807 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3808 } 3809 3810 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3811 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3812 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3813 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3814 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3815 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3816 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3817 } 3818 3819 /* numeric phase */ 3820 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3821 for (i=0; i<m; i++) { 3822 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3823 Ii = i + rstart; 3824 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3825 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3826 } 3827 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3828 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 PetscFunctionReturn(0); 3830 } 3831 3832 #undef __FUNCT__ 3833 #define __FUNCT__ "MatFileSplit" 3834 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3835 { 3836 PetscErrorCode ierr; 3837 PetscMPIInt rank; 3838 PetscInt m,N,i,rstart,nnz; 3839 size_t len; 3840 const PetscInt *indx; 3841 PetscViewer out; 3842 char *name; 3843 Mat B; 3844 const PetscScalar *values; 3845 3846 PetscFunctionBegin; 3847 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3848 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3849 /* Should this be the type of the diagonal block of A? */ 3850 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3851 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3852 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3853 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3854 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3855 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3856 for (i=0; i<m; i++) { 3857 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3858 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3859 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3860 } 3861 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3862 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3863 3864 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3865 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3866 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3867 sprintf(name,"%s.%d",outfile,rank); 3868 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3869 ierr = PetscFree(name);CHKERRQ(ierr); 3870 ierr = MatView(B,out);CHKERRQ(ierr); 3871 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3872 ierr = MatDestroy(&B);CHKERRQ(ierr); 3873 PetscFunctionReturn(0); 3874 } 3875 3876 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3879 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3880 { 3881 PetscErrorCode ierr; 3882 Mat_Merge_SeqsToMPI *merge; 3883 PetscContainer container; 3884 3885 PetscFunctionBegin; 3886 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3887 if (container) { 3888 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3889 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3890 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3891 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3892 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3893 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3894 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3901 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3902 ierr = PetscFree(merge);CHKERRQ(ierr); 3903 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3904 } 3905 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 #include <../src/mat/utils/freespace.h> 3910 #include <petscbt.h> 3911 3912 #undef __FUNCT__ 3913 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3914 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3915 { 3916 PetscErrorCode ierr; 3917 MPI_Comm comm; 3918 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3919 PetscMPIInt size,rank,taga,*len_s; 3920 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3921 PetscInt proc,m; 3922 PetscInt **buf_ri,**buf_rj; 3923 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3924 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3925 MPI_Request *s_waits,*r_waits; 3926 MPI_Status *status; 3927 MatScalar *aa=a->a; 3928 MatScalar **abuf_r,*ba_i; 3929 Mat_Merge_SeqsToMPI *merge; 3930 PetscContainer container; 3931 3932 PetscFunctionBegin; 3933 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3934 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3935 3936 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3937 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3938 3939 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3940 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3941 3942 bi = merge->bi; 3943 bj = merge->bj; 3944 buf_ri = merge->buf_ri; 3945 buf_rj = merge->buf_rj; 3946 3947 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3948 owners = merge->rowmap->range; 3949 len_s = merge->len_s; 3950 3951 /* send and recv matrix values */ 3952 /*-----------------------------*/ 3953 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3954 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3955 3956 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3957 for (proc=0,k=0; proc<size; proc++) { 3958 if (!len_s[proc]) continue; 3959 i = owners[proc]; 3960 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3961 k++; 3962 } 3963 3964 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3965 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3966 ierr = PetscFree(status);CHKERRQ(ierr); 3967 3968 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3969 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3970 3971 /* insert mat values of mpimat */ 3972 /*----------------------------*/ 3973 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3974 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3975 3976 for (k=0; k<merge->nrecv; k++) { 3977 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3978 nrows = *(buf_ri_k[k]); 3979 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3980 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3981 } 3982 3983 /* set values of ba */ 3984 m = merge->rowmap->n; 3985 for (i=0; i<m; i++) { 3986 arow = owners[rank] + i; 3987 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3988 bnzi = bi[i+1] - bi[i]; 3989 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3990 3991 /* add local non-zero vals of this proc's seqmat into ba */ 3992 anzi = ai[arow+1] - ai[arow]; 3993 aj = a->j + ai[arow]; 3994 aa = a->a + ai[arow]; 3995 nextaj = 0; 3996 for (j=0; nextaj<anzi; j++) { 3997 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 3998 ba_i[j] += aa[nextaj++]; 3999 } 4000 } 4001 4002 /* add received vals into ba */ 4003 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4004 /* i-th row */ 4005 if (i == *nextrow[k]) { 4006 anzi = *(nextai[k]+1) - *nextai[k]; 4007 aj = buf_rj[k] + *(nextai[k]); 4008 aa = abuf_r[k] + *(nextai[k]); 4009 nextaj = 0; 4010 for (j=0; nextaj<anzi; j++) { 4011 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4012 ba_i[j] += aa[nextaj++]; 4013 } 4014 } 4015 nextrow[k]++; nextai[k]++; 4016 } 4017 } 4018 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4019 } 4020 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4021 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4022 4023 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4024 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4025 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4026 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4027 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4028 PetscFunctionReturn(0); 4029 } 4030 4031 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4032 4033 #undef __FUNCT__ 4034 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4035 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4036 { 4037 PetscErrorCode ierr; 4038 Mat B_mpi; 4039 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4040 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4041 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4042 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4043 PetscInt len,proc,*dnz,*onz,bs,cbs; 4044 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4045 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4046 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4047 MPI_Status *status; 4048 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4049 PetscBT lnkbt; 4050 Mat_Merge_SeqsToMPI *merge; 4051 PetscContainer container; 4052 4053 PetscFunctionBegin; 4054 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4055 4056 /* make sure it is a PETSc comm */ 4057 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4059 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4060 4061 ierr = PetscNew(&merge);CHKERRQ(ierr); 4062 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4063 4064 /* determine row ownership */ 4065 /*---------------------------------------------------------*/ 4066 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4067 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4068 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4069 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4070 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4071 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4072 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4073 4074 m = merge->rowmap->n; 4075 owners = merge->rowmap->range; 4076 4077 /* determine the number of messages to send, their lengths */ 4078 /*---------------------------------------------------------*/ 4079 len_s = merge->len_s; 4080 4081 len = 0; /* length of buf_si[] */ 4082 merge->nsend = 0; 4083 for (proc=0; proc<size; proc++) { 4084 len_si[proc] = 0; 4085 if (proc == rank) { 4086 len_s[proc] = 0; 4087 } else { 4088 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4089 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4090 } 4091 if (len_s[proc]) { 4092 merge->nsend++; 4093 nrows = 0; 4094 for (i=owners[proc]; i<owners[proc+1]; i++) { 4095 if (ai[i+1] > ai[i]) nrows++; 4096 } 4097 len_si[proc] = 2*(nrows+1); 4098 len += len_si[proc]; 4099 } 4100 } 4101 4102 /* determine the number and length of messages to receive for ij-structure */ 4103 /*-------------------------------------------------------------------------*/ 4104 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4105 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4106 4107 /* post the Irecv of j-structure */ 4108 /*-------------------------------*/ 4109 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4110 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4111 4112 /* post the Isend of j-structure */ 4113 /*--------------------------------*/ 4114 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4115 4116 for (proc=0, k=0; proc<size; proc++) { 4117 if (!len_s[proc]) continue; 4118 i = owners[proc]; 4119 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4120 k++; 4121 } 4122 4123 /* receives and sends of j-structure are complete */ 4124 /*------------------------------------------------*/ 4125 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4126 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4127 4128 /* send and recv i-structure */ 4129 /*---------------------------*/ 4130 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4131 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4132 4133 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4134 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4135 for (proc=0,k=0; proc<size; proc++) { 4136 if (!len_s[proc]) continue; 4137 /* form outgoing message for i-structure: 4138 buf_si[0]: nrows to be sent 4139 [1:nrows]: row index (global) 4140 [nrows+1:2*nrows+1]: i-structure index 4141 */ 4142 /*-------------------------------------------*/ 4143 nrows = len_si[proc]/2 - 1; 4144 buf_si_i = buf_si + nrows+1; 4145 buf_si[0] = nrows; 4146 buf_si_i[0] = 0; 4147 nrows = 0; 4148 for (i=owners[proc]; i<owners[proc+1]; i++) { 4149 anzi = ai[i+1] - ai[i]; 4150 if (anzi) { 4151 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4152 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4153 nrows++; 4154 } 4155 } 4156 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4157 k++; 4158 buf_si += len_si[proc]; 4159 } 4160 4161 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4162 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4163 4164 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4165 for (i=0; i<merge->nrecv; i++) { 4166 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4167 } 4168 4169 ierr = PetscFree(len_si);CHKERRQ(ierr); 4170 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4171 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4172 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4173 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4174 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4175 ierr = PetscFree(status);CHKERRQ(ierr); 4176 4177 /* compute a local seq matrix in each processor */ 4178 /*----------------------------------------------*/ 4179 /* allocate bi array and free space for accumulating nonzero column info */ 4180 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4181 bi[0] = 0; 4182 4183 /* create and initialize a linked list */ 4184 nlnk = N+1; 4185 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4186 4187 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4188 len = ai[owners[rank+1]] - ai[owners[rank]]; 4189 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4190 4191 current_space = free_space; 4192 4193 /* determine symbolic info for each local row */ 4194 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4195 4196 for (k=0; k<merge->nrecv; k++) { 4197 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4198 nrows = *buf_ri_k[k]; 4199 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4200 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4201 } 4202 4203 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4204 len = 0; 4205 for (i=0; i<m; i++) { 4206 bnzi = 0; 4207 /* add local non-zero cols of this proc's seqmat into lnk */ 4208 arow = owners[rank] + i; 4209 anzi = ai[arow+1] - ai[arow]; 4210 aj = a->j + ai[arow]; 4211 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4212 bnzi += nlnk; 4213 /* add received col data into lnk */ 4214 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4215 if (i == *nextrow[k]) { /* i-th row */ 4216 anzi = *(nextai[k]+1) - *nextai[k]; 4217 aj = buf_rj[k] + *nextai[k]; 4218 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4219 bnzi += nlnk; 4220 nextrow[k]++; nextai[k]++; 4221 } 4222 } 4223 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4224 4225 /* if free space is not available, make more free space */ 4226 if (current_space->local_remaining<bnzi) { 4227 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4228 nspacedouble++; 4229 } 4230 /* copy data into free space, then initialize lnk */ 4231 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4232 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4233 4234 current_space->array += bnzi; 4235 current_space->local_used += bnzi; 4236 current_space->local_remaining -= bnzi; 4237 4238 bi[i+1] = bi[i] + bnzi; 4239 } 4240 4241 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4242 4243 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4244 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4245 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4246 4247 /* create symbolic parallel matrix B_mpi */ 4248 /*---------------------------------------*/ 4249 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4250 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4251 if (n==PETSC_DECIDE) { 4252 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4253 } else { 4254 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4255 } 4256 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4257 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4258 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4259 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4260 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4261 4262 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4263 B_mpi->assembled = PETSC_FALSE; 4264 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4265 merge->bi = bi; 4266 merge->bj = bj; 4267 merge->buf_ri = buf_ri; 4268 merge->buf_rj = buf_rj; 4269 merge->coi = NULL; 4270 merge->coj = NULL; 4271 merge->owners_co = NULL; 4272 4273 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4274 4275 /* attach the supporting struct to B_mpi for reuse */ 4276 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4277 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4278 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4279 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4280 *mpimat = B_mpi; 4281 4282 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4283 PetscFunctionReturn(0); 4284 } 4285 4286 #undef __FUNCT__ 4287 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4288 /*@C 4289 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4290 matrices from each processor 4291 4292 Collective on MPI_Comm 4293 4294 Input Parameters: 4295 + comm - the communicators the parallel matrix will live on 4296 . seqmat - the input sequential matrices 4297 . m - number of local rows (or PETSC_DECIDE) 4298 . n - number of local columns (or PETSC_DECIDE) 4299 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4300 4301 Output Parameter: 4302 . mpimat - the parallel matrix generated 4303 4304 Level: advanced 4305 4306 Notes: 4307 The dimensions of the sequential matrix in each processor MUST be the same. 4308 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4309 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4310 @*/ 4311 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4312 { 4313 PetscErrorCode ierr; 4314 PetscMPIInt size; 4315 4316 PetscFunctionBegin; 4317 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4318 if (size == 1) { 4319 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4320 if (scall == MAT_INITIAL_MATRIX) { 4321 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4322 } else { 4323 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4324 } 4325 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4329 if (scall == MAT_INITIAL_MATRIX) { 4330 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4331 } 4332 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4333 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4339 /*@ 4340 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4341 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4342 with MatGetSize() 4343 4344 Not Collective 4345 4346 Input Parameters: 4347 + A - the matrix 4348 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4349 4350 Output Parameter: 4351 . A_loc - the local sequential matrix generated 4352 4353 Level: developer 4354 4355 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4356 4357 @*/ 4358 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4359 { 4360 PetscErrorCode ierr; 4361 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4362 Mat_SeqAIJ *mat,*a,*b; 4363 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4364 MatScalar *aa,*ba,*cam; 4365 PetscScalar *ca; 4366 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4367 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4368 PetscBool match; 4369 MPI_Comm comm; 4370 PetscMPIInt size; 4371 4372 PetscFunctionBegin; 4373 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4374 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4375 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4376 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4377 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4378 4379 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4380 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4381 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4382 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4383 aa = a->a; ba = b->a; 4384 if (scall == MAT_INITIAL_MATRIX) { 4385 if (size == 1) { 4386 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4391 ci[0] = 0; 4392 for (i=0; i<am; i++) { 4393 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4394 } 4395 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4396 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4397 k = 0; 4398 for (i=0; i<am; i++) { 4399 ncols_o = bi[i+1] - bi[i]; 4400 ncols_d = ai[i+1] - ai[i]; 4401 /* off-diagonal portion of A */ 4402 for (jo=0; jo<ncols_o; jo++) { 4403 col = cmap[*bj]; 4404 if (col >= cstart) break; 4405 cj[k] = col; bj++; 4406 ca[k++] = *ba++; 4407 } 4408 /* diagonal portion of A */ 4409 for (j=0; j<ncols_d; j++) { 4410 cj[k] = cstart + *aj++; 4411 ca[k++] = *aa++; 4412 } 4413 /* off-diagonal portion of A */ 4414 for (j=jo; j<ncols_o; j++) { 4415 cj[k] = cmap[*bj++]; 4416 ca[k++] = *ba++; 4417 } 4418 } 4419 /* put together the new matrix */ 4420 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4421 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4422 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4423 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4424 mat->free_a = PETSC_TRUE; 4425 mat->free_ij = PETSC_TRUE; 4426 mat->nonew = 0; 4427 } else if (scall == MAT_REUSE_MATRIX) { 4428 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4429 ci = mat->i; cj = mat->j; cam = mat->a; 4430 for (i=0; i<am; i++) { 4431 /* off-diagonal portion of A */ 4432 ncols_o = bi[i+1] - bi[i]; 4433 for (jo=0; jo<ncols_o; jo++) { 4434 col = cmap[*bj]; 4435 if (col >= cstart) break; 4436 *cam++ = *ba++; bj++; 4437 } 4438 /* diagonal portion of A */ 4439 ncols_d = ai[i+1] - ai[i]; 4440 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4441 /* off-diagonal portion of A */ 4442 for (j=jo; j<ncols_o; j++) { 4443 *cam++ = *ba++; bj++; 4444 } 4445 } 4446 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4447 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4448 PetscFunctionReturn(0); 4449 } 4450 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4453 /*@C 4454 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4455 4456 Not Collective 4457 4458 Input Parameters: 4459 + A - the matrix 4460 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4461 - row, col - index sets of rows and columns to extract (or NULL) 4462 4463 Output Parameter: 4464 . A_loc - the local sequential matrix generated 4465 4466 Level: developer 4467 4468 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4469 4470 @*/ 4471 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4472 { 4473 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4474 PetscErrorCode ierr; 4475 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4476 IS isrowa,iscola; 4477 Mat *aloc; 4478 PetscBool match; 4479 4480 PetscFunctionBegin; 4481 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4482 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4483 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4484 if (!row) { 4485 start = A->rmap->rstart; end = A->rmap->rend; 4486 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4487 } else { 4488 isrowa = *row; 4489 } 4490 if (!col) { 4491 start = A->cmap->rstart; 4492 cmap = a->garray; 4493 nzA = a->A->cmap->n; 4494 nzB = a->B->cmap->n; 4495 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4496 ncols = 0; 4497 for (i=0; i<nzB; i++) { 4498 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4499 else break; 4500 } 4501 imark = i; 4502 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4503 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4504 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4505 } else { 4506 iscola = *col; 4507 } 4508 if (scall != MAT_INITIAL_MATRIX) { 4509 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4510 aloc[0] = *A_loc; 4511 } 4512 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4513 *A_loc = aloc[0]; 4514 ierr = PetscFree(aloc);CHKERRQ(ierr); 4515 if (!row) { 4516 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4517 } 4518 if (!col) { 4519 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4520 } 4521 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 #undef __FUNCT__ 4526 #define __FUNCT__ "MatGetBrowsOfAcols" 4527 /*@C 4528 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4529 4530 Collective on Mat 4531 4532 Input Parameters: 4533 + A,B - the matrices in mpiaij format 4534 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4535 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4536 4537 Output Parameter: 4538 + rowb, colb - index sets of rows and columns of B to extract 4539 - B_seq - the sequential matrix generated 4540 4541 Level: developer 4542 4543 @*/ 4544 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4545 { 4546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4547 PetscErrorCode ierr; 4548 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4549 IS isrowb,iscolb; 4550 Mat *bseq=NULL; 4551 4552 PetscFunctionBegin; 4553 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4554 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4555 } 4556 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4557 4558 if (scall == MAT_INITIAL_MATRIX) { 4559 start = A->cmap->rstart; 4560 cmap = a->garray; 4561 nzA = a->A->cmap->n; 4562 nzB = a->B->cmap->n; 4563 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4564 ncols = 0; 4565 for (i=0; i<nzB; i++) { /* row < local row index */ 4566 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4567 else break; 4568 } 4569 imark = i; 4570 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4571 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4572 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4573 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4574 } else { 4575 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4576 isrowb = *rowb; iscolb = *colb; 4577 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4578 bseq[0] = *B_seq; 4579 } 4580 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4581 *B_seq = bseq[0]; 4582 ierr = PetscFree(bseq);CHKERRQ(ierr); 4583 if (!rowb) { 4584 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4585 } else { 4586 *rowb = isrowb; 4587 } 4588 if (!colb) { 4589 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4590 } else { 4591 *colb = iscolb; 4592 } 4593 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4599 /* 4600 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4601 of the OFF-DIAGONAL portion of local A 4602 4603 Collective on Mat 4604 4605 Input Parameters: 4606 + A,B - the matrices in mpiaij format 4607 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4608 4609 Output Parameter: 4610 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4611 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4612 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4613 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4614 4615 Level: developer 4616 4617 */ 4618 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4619 { 4620 VecScatter_MPI_General *gen_to,*gen_from; 4621 PetscErrorCode ierr; 4622 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4623 Mat_SeqAIJ *b_oth; 4624 VecScatter ctx =a->Mvctx; 4625 MPI_Comm comm; 4626 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4627 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4628 PetscScalar *rvalues,*svalues; 4629 MatScalar *b_otha,*bufa,*bufA; 4630 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4631 MPI_Request *rwaits = NULL,*swaits = NULL; 4632 MPI_Status *sstatus,rstatus; 4633 PetscMPIInt jj,size; 4634 PetscInt *cols,sbs,rbs; 4635 PetscScalar *vals; 4636 4637 PetscFunctionBegin; 4638 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4639 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4640 4641 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4642 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4643 } 4644 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4646 4647 gen_to = (VecScatter_MPI_General*)ctx->todata; 4648 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4649 rvalues = gen_from->values; /* holds the length of receiving row */ 4650 svalues = gen_to->values; /* holds the length of sending row */ 4651 nrecvs = gen_from->n; 4652 nsends = gen_to->n; 4653 4654 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4655 srow = gen_to->indices; /* local row index to be sent */ 4656 sstarts = gen_to->starts; 4657 sprocs = gen_to->procs; 4658 sstatus = gen_to->sstatus; 4659 sbs = gen_to->bs; 4660 rstarts = gen_from->starts; 4661 rprocs = gen_from->procs; 4662 rbs = gen_from->bs; 4663 4664 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4665 if (scall == MAT_INITIAL_MATRIX) { 4666 /* i-array */ 4667 /*---------*/ 4668 /* post receives */ 4669 for (i=0; i<nrecvs; i++) { 4670 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4671 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4672 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4673 } 4674 4675 /* pack the outgoing message */ 4676 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4677 4678 sstartsj[0] = 0; 4679 rstartsj[0] = 0; 4680 len = 0; /* total length of j or a array to be sent */ 4681 k = 0; 4682 for (i=0; i<nsends; i++) { 4683 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4684 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4685 for (j=0; j<nrows; j++) { 4686 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4687 for (l=0; l<sbs; l++) { 4688 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4689 4690 rowlen[j*sbs+l] = ncols; 4691 4692 len += ncols; 4693 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4694 } 4695 k++; 4696 } 4697 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4698 4699 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4700 } 4701 /* recvs and sends of i-array are completed */ 4702 i = nrecvs; 4703 while (i--) { 4704 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4705 } 4706 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4707 4708 /* allocate buffers for sending j and a arrays */ 4709 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4710 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4711 4712 /* create i-array of B_oth */ 4713 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4714 4715 b_othi[0] = 0; 4716 len = 0; /* total length of j or a array to be received */ 4717 k = 0; 4718 for (i=0; i<nrecvs; i++) { 4719 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4720 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4721 for (j=0; j<nrows; j++) { 4722 b_othi[k+1] = b_othi[k] + rowlen[j]; 4723 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4724 k++; 4725 } 4726 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4727 } 4728 4729 /* allocate space for j and a arrrays of B_oth */ 4730 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4731 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4732 4733 /* j-array */ 4734 /*---------*/ 4735 /* post receives of j-array */ 4736 for (i=0; i<nrecvs; i++) { 4737 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4738 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4739 } 4740 4741 /* pack the outgoing message j-array */ 4742 k = 0; 4743 for (i=0; i<nsends; i++) { 4744 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4745 bufJ = bufj+sstartsj[i]; 4746 for (j=0; j<nrows; j++) { 4747 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4748 for (ll=0; ll<sbs; ll++) { 4749 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4750 for (l=0; l<ncols; l++) { 4751 *bufJ++ = cols[l]; 4752 } 4753 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4754 } 4755 } 4756 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4757 } 4758 4759 /* recvs and sends of j-array are completed */ 4760 i = nrecvs; 4761 while (i--) { 4762 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4763 } 4764 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4765 } else if (scall == MAT_REUSE_MATRIX) { 4766 sstartsj = *startsj_s; 4767 rstartsj = *startsj_r; 4768 bufa = *bufa_ptr; 4769 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4770 b_otha = b_oth->a; 4771 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4772 4773 /* a-array */ 4774 /*---------*/ 4775 /* post receives of a-array */ 4776 for (i=0; i<nrecvs; i++) { 4777 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4778 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4779 } 4780 4781 /* pack the outgoing message a-array */ 4782 k = 0; 4783 for (i=0; i<nsends; i++) { 4784 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4785 bufA = bufa+sstartsj[i]; 4786 for (j=0; j<nrows; j++) { 4787 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4788 for (ll=0; ll<sbs; ll++) { 4789 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4790 for (l=0; l<ncols; l++) { 4791 *bufA++ = vals[l]; 4792 } 4793 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4794 } 4795 } 4796 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4797 } 4798 /* recvs and sends of a-array are completed */ 4799 i = nrecvs; 4800 while (i--) { 4801 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4802 } 4803 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4804 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4805 4806 if (scall == MAT_INITIAL_MATRIX) { 4807 /* put together the new matrix */ 4808 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4809 4810 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4811 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4812 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4813 b_oth->free_a = PETSC_TRUE; 4814 b_oth->free_ij = PETSC_TRUE; 4815 b_oth->nonew = 0; 4816 4817 ierr = PetscFree(bufj);CHKERRQ(ierr); 4818 if (!startsj_s || !bufa_ptr) { 4819 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4820 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4821 } else { 4822 *startsj_s = sstartsj; 4823 *startsj_r = rstartsj; 4824 *bufa_ptr = bufa; 4825 } 4826 } 4827 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4828 PetscFunctionReturn(0); 4829 } 4830 4831 #undef __FUNCT__ 4832 #define __FUNCT__ "MatGetCommunicationStructs" 4833 /*@C 4834 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4835 4836 Not Collective 4837 4838 Input Parameters: 4839 . A - The matrix in mpiaij format 4840 4841 Output Parameter: 4842 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4843 . colmap - A map from global column index to local index into lvec 4844 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4845 4846 Level: developer 4847 4848 @*/ 4849 #if defined(PETSC_USE_CTABLE) 4850 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4851 #else 4852 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4853 #endif 4854 { 4855 Mat_MPIAIJ *a; 4856 4857 PetscFunctionBegin; 4858 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4859 PetscValidPointer(lvec, 2); 4860 PetscValidPointer(colmap, 3); 4861 PetscValidPointer(multScatter, 4); 4862 a = (Mat_MPIAIJ*) A->data; 4863 if (lvec) *lvec = a->lvec; 4864 if (colmap) *colmap = a->colmap; 4865 if (multScatter) *multScatter = a->Mvctx; 4866 PetscFunctionReturn(0); 4867 } 4868 4869 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4870 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4871 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4872 #if defined(PETSC_HAVE_ELEMENTAL) 4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4874 #endif 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4878 /* 4879 Computes (B'*A')' since computing B*A directly is untenable 4880 4881 n p p 4882 ( ) ( ) ( ) 4883 m ( A ) * n ( B ) = m ( C ) 4884 ( ) ( ) ( ) 4885 4886 */ 4887 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4888 { 4889 PetscErrorCode ierr; 4890 Mat At,Bt,Ct; 4891 4892 PetscFunctionBegin; 4893 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4894 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4895 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4896 ierr = MatDestroy(&At);CHKERRQ(ierr); 4897 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4898 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4899 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 4903 #undef __FUNCT__ 4904 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4905 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4906 { 4907 PetscErrorCode ierr; 4908 PetscInt m=A->rmap->n,n=B->cmap->n; 4909 Mat Cmat; 4910 4911 PetscFunctionBegin; 4912 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4913 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4914 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4915 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4916 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4917 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4918 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4919 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4920 4921 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4922 4923 *C = Cmat; 4924 PetscFunctionReturn(0); 4925 } 4926 4927 /* ----------------------------------------------------------------*/ 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4930 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4931 { 4932 PetscErrorCode ierr; 4933 4934 PetscFunctionBegin; 4935 if (scall == MAT_INITIAL_MATRIX) { 4936 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4937 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4938 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4939 } 4940 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4941 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4942 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 /*MC 4947 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4948 4949 Options Database Keys: 4950 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4951 4952 Level: beginner 4953 4954 .seealso: MatCreateAIJ() 4955 M*/ 4956 4957 #undef __FUNCT__ 4958 #define __FUNCT__ "MatCreate_MPIAIJ" 4959 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4960 { 4961 Mat_MPIAIJ *b; 4962 PetscErrorCode ierr; 4963 PetscMPIInt size; 4964 4965 PetscFunctionBegin; 4966 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4967 4968 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4969 B->data = (void*)b; 4970 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4971 B->assembled = PETSC_FALSE; 4972 B->insertmode = NOT_SET_VALUES; 4973 b->size = size; 4974 4975 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4976 4977 /* build cache for off array entries formed */ 4978 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4979 4980 b->donotstash = PETSC_FALSE; 4981 b->colmap = 0; 4982 b->garray = 0; 4983 b->roworiented = PETSC_TRUE; 4984 4985 /* stuff used for matrix vector multiply */ 4986 b->lvec = NULL; 4987 b->Mvctx = NULL; 4988 4989 /* stuff for MatGetRow() */ 4990 b->rowindices = 0; 4991 b->rowvalues = 0; 4992 b->getrowactive = PETSC_FALSE; 4993 4994 /* flexible pointer used in CUSP/CUSPARSE classes */ 4995 b->spptr = NULL; 4996 4997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 4998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5000 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5001 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5002 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5007 #if defined(PETSC_HAVE_ELEMENTAL) 5008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5009 #endif 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5014 PetscFunctionReturn(0); 5015 } 5016 5017 #undef __FUNCT__ 5018 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5019 /*@C 5020 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5021 and "off-diagonal" part of the matrix in CSR format. 5022 5023 Collective on MPI_Comm 5024 5025 Input Parameters: 5026 + comm - MPI communicator 5027 . m - number of local rows (Cannot be PETSC_DECIDE) 5028 . n - This value should be the same as the local size used in creating the 5029 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5030 calculated if N is given) For square matrices n is almost always m. 5031 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5032 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5033 . i - row indices for "diagonal" portion of matrix 5034 . j - column indices 5035 . a - matrix values 5036 . oi - row indices for "off-diagonal" portion of matrix 5037 . oj - column indices 5038 - oa - matrix values 5039 5040 Output Parameter: 5041 . mat - the matrix 5042 5043 Level: advanced 5044 5045 Notes: 5046 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5047 must free the arrays once the matrix has been destroyed and not before. 5048 5049 The i and j indices are 0 based 5050 5051 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5052 5053 This sets local rows and cannot be used to set off-processor values. 5054 5055 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5056 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5057 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5058 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5059 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5060 communication if it is known that only local entries will be set. 5061 5062 .keywords: matrix, aij, compressed row, sparse, parallel 5063 5064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5065 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5066 @*/ 5067 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5068 { 5069 PetscErrorCode ierr; 5070 Mat_MPIAIJ *maij; 5071 5072 PetscFunctionBegin; 5073 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5074 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5075 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5076 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5077 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5078 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5079 maij = (Mat_MPIAIJ*) (*mat)->data; 5080 5081 (*mat)->preallocated = PETSC_TRUE; 5082 5083 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5084 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5085 5086 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5087 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5088 5089 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5090 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5091 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5092 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5093 5094 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5095 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5096 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5097 PetscFunctionReturn(0); 5098 } 5099 5100 /* 5101 Special version for direct calls from Fortran 5102 */ 5103 #include <petsc/private/fortranimpl.h> 5104 5105 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5106 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5107 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5108 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5109 #endif 5110 5111 /* Change these macros so can be used in void function */ 5112 #undef CHKERRQ 5113 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5114 #undef SETERRQ2 5115 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5116 #undef SETERRQ3 5117 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5118 #undef SETERRQ 5119 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5120 5121 #undef __FUNCT__ 5122 #define __FUNCT__ "matsetvaluesmpiaij_" 5123 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5124 { 5125 Mat mat = *mmat; 5126 PetscInt m = *mm, n = *mn; 5127 InsertMode addv = *maddv; 5128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5129 PetscScalar value; 5130 PetscErrorCode ierr; 5131 5132 MatCheckPreallocated(mat,1); 5133 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5134 5135 #if defined(PETSC_USE_DEBUG) 5136 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5137 #endif 5138 { 5139 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5140 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5141 PetscBool roworiented = aij->roworiented; 5142 5143 /* Some Variables required in the macro */ 5144 Mat A = aij->A; 5145 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5146 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5147 MatScalar *aa = a->a; 5148 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5149 Mat B = aij->B; 5150 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5151 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5152 MatScalar *ba = b->a; 5153 5154 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5155 PetscInt nonew = a->nonew; 5156 MatScalar *ap1,*ap2; 5157 5158 PetscFunctionBegin; 5159 for (i=0; i<m; i++) { 5160 if (im[i] < 0) continue; 5161 #if defined(PETSC_USE_DEBUG) 5162 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5163 #endif 5164 if (im[i] >= rstart && im[i] < rend) { 5165 row = im[i] - rstart; 5166 lastcol1 = -1; 5167 rp1 = aj + ai[row]; 5168 ap1 = aa + ai[row]; 5169 rmax1 = aimax[row]; 5170 nrow1 = ailen[row]; 5171 low1 = 0; 5172 high1 = nrow1; 5173 lastcol2 = -1; 5174 rp2 = bj + bi[row]; 5175 ap2 = ba + bi[row]; 5176 rmax2 = bimax[row]; 5177 nrow2 = bilen[row]; 5178 low2 = 0; 5179 high2 = nrow2; 5180 5181 for (j=0; j<n; j++) { 5182 if (roworiented) value = v[i*n+j]; 5183 else value = v[i+j*m]; 5184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5185 if (in[j] >= cstart && in[j] < cend) { 5186 col = in[j] - cstart; 5187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5188 } else if (in[j] < 0) continue; 5189 #if defined(PETSC_USE_DEBUG) 5190 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5191 #endif 5192 else { 5193 if (mat->was_assembled) { 5194 if (!aij->colmap) { 5195 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5196 } 5197 #if defined(PETSC_USE_CTABLE) 5198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5199 col--; 5200 #else 5201 col = aij->colmap[in[j]] - 1; 5202 #endif 5203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5204 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5205 col = in[j]; 5206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5207 B = aij->B; 5208 b = (Mat_SeqAIJ*)B->data; 5209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5210 rp2 = bj + bi[row]; 5211 ap2 = ba + bi[row]; 5212 rmax2 = bimax[row]; 5213 nrow2 = bilen[row]; 5214 low2 = 0; 5215 high2 = nrow2; 5216 bm = aij->B->rmap->n; 5217 ba = b->a; 5218 } 5219 } else col = in[j]; 5220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5221 } 5222 } 5223 } else if (!aij->donotstash) { 5224 if (roworiented) { 5225 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5226 } else { 5227 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5228 } 5229 } 5230 } 5231 } 5232 PetscFunctionReturnVoid(); 5233 } 5234 5235