1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1162 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1165 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1166 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1167 PetscErrorCode ierr; 1168 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1169 int fd; 1170 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1171 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1172 PetscScalar *column_values; 1173 PetscInt message_count,flowcontrolcount; 1174 FILE *file; 1175 1176 PetscFunctionBegin; 1177 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1178 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1179 nz = A->nz + B->nz; 1180 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1181 if (!rank) { 1182 header[0] = MAT_FILE_CLASSID; 1183 header[1] = mat->rmap->N; 1184 header[2] = mat->cmap->N; 1185 1186 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 /* get largest number of rows any processor has */ 1189 rlen = mat->rmap->n; 1190 range = mat->rmap->range; 1191 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1192 } else { 1193 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1194 rlen = mat->rmap->n; 1195 } 1196 1197 /* load up the local row counts */ 1198 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1199 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1200 1201 /* store the row lengths to the file */ 1202 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1203 if (!rank) { 1204 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 for (i=1; i<size; i++) { 1206 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1207 rlen = range[i+1] - range[i]; 1208 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1209 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1210 } 1211 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1212 } else { 1213 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1214 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1215 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1216 } 1217 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1218 1219 /* load up the local column indices */ 1220 nzmax = nz; /* th processor needs space a largest processor needs */ 1221 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1223 cnt = 0; 1224 for (i=0; i<mat->rmap->n; i++) { 1225 for (j=B->i[i]; j<B->i[i+1]; j++) { 1226 if ((col = garray[B->j[j]]) > cstart) break; 1227 column_indices[cnt++] = col; 1228 } 1229 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1230 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1231 } 1232 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1233 1234 /* store the column indices to the file */ 1235 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1236 if (!rank) { 1237 MPI_Status status; 1238 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1239 for (i=1; i<size; i++) { 1240 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1241 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1242 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1243 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1244 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1245 } 1246 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1247 } else { 1248 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1249 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1252 } 1253 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1254 1255 /* load up the local column values */ 1256 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1257 cnt = 0; 1258 for (i=0; i<mat->rmap->n; i++) { 1259 for (j=B->i[i]; j<B->i[i+1]; j++) { 1260 if (garray[B->j[j]] > cstart) break; 1261 column_values[cnt++] = B->a[j]; 1262 } 1263 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1264 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1265 } 1266 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1267 1268 /* store the column values to the file */ 1269 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1270 if (!rank) { 1271 MPI_Status status; 1272 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1273 for (i=1; i<size; i++) { 1274 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1275 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1276 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1277 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1279 } 1280 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1281 } else { 1282 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1283 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1284 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1286 } 1287 ierr = PetscFree(column_values);CHKERRQ(ierr); 1288 1289 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1290 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #include <petscdraw.h> 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1297 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1298 { 1299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1300 PetscErrorCode ierr; 1301 PetscMPIInt rank = aij->rank,size = aij->size; 1302 PetscBool isdraw,iascii,isbinary; 1303 PetscViewer sviewer; 1304 PetscViewerFormat format; 1305 1306 PetscFunctionBegin; 1307 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1310 if (iascii) { 1311 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscBool inodes; 1315 1316 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1317 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1318 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1320 if (!inodes) { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1326 } 1327 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1329 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1331 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1332 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1334 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1339 if (inodes) { 1340 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1341 } else { 1342 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1351 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1352 } else { 1353 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (isdraw) { 1357 PetscDraw draw; 1358 PetscBool isnull; 1359 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1360 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1361 if (isnull) PetscFunctionReturn(0); 1362 } 1363 1364 { 1365 /* assemble the entire matrix onto first processor. */ 1366 Mat A; 1367 Mat_SeqAIJ *Aloc; 1368 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1369 MatScalar *a; 1370 1371 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1372 if (!rank) { 1373 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1374 } else { 1375 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1376 } 1377 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1378 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1379 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1380 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1381 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1382 1383 /* copy over the A part */ 1384 Aloc = (Mat_SeqAIJ*)aij->A->data; 1385 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1390 row++; 1391 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1392 } 1393 aj = Aloc->j; 1394 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1395 1396 /* copy over the B part */ 1397 Aloc = (Mat_SeqAIJ*)aij->B->data; 1398 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1399 row = mat->rmap->rstart; 1400 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1401 ct = cols; 1402 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; 1406 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1407 } 1408 ierr = PetscFree(ct);CHKERRQ(ierr); 1409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 /* 1412 Everyone has to call to draw the matrix since the graphics waits are 1413 synchronized across all processors that share the PetscDraw object 1414 */ 1415 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1418 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1421 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1422 ierr = MatDestroy(&A);CHKERRQ(ierr); 1423 } 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatView_MPIAIJ" 1429 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1430 { 1431 PetscErrorCode ierr; 1432 PetscBool iascii,isdraw,issocket,isbinary; 1433 1434 PetscFunctionBegin; 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1439 if (iascii || isdraw || isbinary || issocket) { 1440 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSOR_MPIAIJ" 1447 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1448 { 1449 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1450 PetscErrorCode ierr; 1451 Vec bb1 = 0; 1452 PetscBool hasop; 1453 1454 PetscFunctionBegin; 1455 if (flag == SOR_APPLY_UPPER) { 1456 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } 1459 1460 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1461 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1462 } 1463 1464 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1465 if (flag & SOR_ZERO_INITIAL_GUESS) { 1466 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1467 its--; 1468 } 1469 1470 while (its--) { 1471 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 1474 /* update rhs: bb1 = bb - B*x */ 1475 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1476 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1477 1478 /* local sweep */ 1479 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1480 } 1481 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 its--; 1485 } 1486 while (its--) { 1487 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 1490 /* update rhs: bb1 = bb - B*x */ 1491 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1492 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1493 1494 /* local sweep */ 1495 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1496 } 1497 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1498 if (flag & SOR_ZERO_INITIAL_GUESS) { 1499 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1500 its--; 1501 } 1502 while (its--) { 1503 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 1506 /* update rhs: bb1 = bb - B*x */ 1507 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1508 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1512 } 1513 } else if (flag & SOR_EISENSTAT) { 1514 Vec xx1; 1515 1516 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1517 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1518 1519 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 if (!mat->diag) { 1522 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1523 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1524 } 1525 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1526 if (hasop) { 1527 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1528 } else { 1529 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1530 } 1531 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1532 1533 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1537 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1538 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1539 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1540 1541 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1542 1543 matin->factorerrortype = mat->A->factorerrortype; 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatPermute_MPIAIJ" 1549 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1550 { 1551 Mat aA,aB,Aperm; 1552 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1553 PetscScalar *aa,*ba; 1554 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1555 PetscSF rowsf,sf; 1556 IS parcolp = NULL; 1557 PetscBool done; 1558 PetscErrorCode ierr; 1559 1560 PetscFunctionBegin; 1561 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1562 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1563 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1564 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1565 1566 /* Invert row permutation to find out where my rows should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1570 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1571 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 1574 /* Invert column permutation to find out where my columns should go */ 1575 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1576 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1577 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1578 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1579 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1580 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1584 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1585 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1586 1587 /* Find out where my gcols should go */ 1588 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1589 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1594 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1595 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1596 1597 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1598 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1599 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1600 for (i=0; i<m; i++) { 1601 PetscInt row = rdest[i],rowner; 1602 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1603 for (j=ai[i]; j<ai[i+1]; j++) { 1604 PetscInt cowner,col = cdest[aj[j]]; 1605 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1606 if (rowner == cowner) dnnz[i]++; 1607 else onnz[i]++; 1608 } 1609 for (j=bi[i]; j<bi[i+1]; j++) { 1610 PetscInt cowner,col = gcdest[bj[j]]; 1611 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1612 if (rowner == cowner) dnnz[i]++; 1613 else onnz[i]++; 1614 } 1615 } 1616 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1617 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1621 1622 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1623 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1624 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1627 PetscInt j0,rowlen; 1628 rowlen = ai[i+1] - ai[i]; 1629 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1630 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1631 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1632 } 1633 rowlen = bi[i+1] - bi[i]; 1634 for (j0=j=0; j<rowlen; j0=j) { 1635 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1636 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1637 } 1638 } 1639 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1640 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1641 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1642 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1643 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1644 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1645 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1646 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1647 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1648 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1649 *B = Aperm; 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1655 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1656 { 1657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1662 if (ghosts) *ghosts = aij->garray; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1668 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1669 { 1670 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1671 Mat A = mat->A,B = mat->B; 1672 PetscErrorCode ierr; 1673 PetscReal isend[5],irecv[5]; 1674 1675 PetscFunctionBegin; 1676 info->block_size = 1.0; 1677 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1678 1679 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1680 isend[3] = info->memory; isend[4] = info->mallocs; 1681 1682 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1683 1684 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1685 isend[3] += info->memory; isend[4] += info->mallocs; 1686 if (flag == MAT_LOCAL) { 1687 info->nz_used = isend[0]; 1688 info->nz_allocated = isend[1]; 1689 info->nz_unneeded = isend[2]; 1690 info->memory = isend[3]; 1691 info->mallocs = isend[4]; 1692 } else if (flag == MAT_GLOBAL_MAX) { 1693 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1694 1695 info->nz_used = irecv[0]; 1696 info->nz_allocated = irecv[1]; 1697 info->nz_unneeded = irecv[2]; 1698 info->memory = irecv[3]; 1699 info->mallocs = irecv[4]; 1700 } else if (flag == MAT_GLOBAL_SUM) { 1701 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1702 1703 info->nz_used = irecv[0]; 1704 info->nz_allocated = irecv[1]; 1705 info->nz_unneeded = irecv[2]; 1706 info->memory = irecv[3]; 1707 info->mallocs = irecv[4]; 1708 } 1709 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1710 info->fill_ratio_needed = 0; 1711 info->factor_mallocs = 0; 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatSetOption_MPIAIJ" 1717 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1718 { 1719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 switch (op) { 1724 case MAT_NEW_NONZERO_LOCATIONS: 1725 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1726 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1727 case MAT_KEEP_NONZERO_PATTERN: 1728 case MAT_NEW_NONZERO_LOCATION_ERR: 1729 case MAT_USE_INODES: 1730 case MAT_IGNORE_ZERO_ENTRIES: 1731 MatCheckPreallocated(A,1); 1732 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1733 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_ROW_ORIENTED: 1736 MatCheckPreallocated(A,1); 1737 a->roworiented = flg; 1738 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_NEW_DIAGONALS: 1743 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1744 break; 1745 case MAT_IGNORE_OFF_PROC_ENTRIES: 1746 a->donotstash = flg; 1747 break; 1748 case MAT_SPD: 1749 A->spd_set = PETSC_TRUE; 1750 A->spd = flg; 1751 if (flg) { 1752 A->symmetric = PETSC_TRUE; 1753 A->structurally_symmetric = PETSC_TRUE; 1754 A->symmetric_set = PETSC_TRUE; 1755 A->structurally_symmetric_set = PETSC_TRUE; 1756 } 1757 break; 1758 case MAT_SYMMETRIC: 1759 MatCheckPreallocated(A,1); 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_STRUCTURALLY_SYMMETRIC: 1763 MatCheckPreallocated(A,1); 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 break; 1766 case MAT_HERMITIAN: 1767 MatCheckPreallocated(A,1); 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_SYMMETRY_ETERNAL: 1771 MatCheckPreallocated(A,1); 1772 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1773 break; 1774 default: 1775 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatGetRow_MPIAIJ" 1782 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1783 { 1784 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1785 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1786 PetscErrorCode ierr; 1787 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1788 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1789 PetscInt *cmap,*idx_p; 1790 1791 PetscFunctionBegin; 1792 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1793 mat->getrowactive = PETSC_TRUE; 1794 1795 if (!mat->rowvalues && (idx || v)) { 1796 /* 1797 allocate enough space to hold information from the longest row. 1798 */ 1799 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1800 PetscInt max = 1,tmp; 1801 for (i=0; i<matin->rmap->n; i++) { 1802 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1803 if (max < tmp) max = tmp; 1804 } 1805 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1806 } 1807 1808 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1809 lrow = row - rstart; 1810 1811 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1812 if (!v) {pvA = 0; pvB = 0;} 1813 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1814 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1815 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1816 nztot = nzA + nzB; 1817 1818 cmap = mat->garray; 1819 if (v || idx) { 1820 if (nztot) { 1821 /* Sort by increasing column numbers, assuming A and B already sorted */ 1822 PetscInt imark = -1; 1823 if (v) { 1824 *v = v_p = mat->rowvalues; 1825 for (i=0; i<nzB; i++) { 1826 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1827 else break; 1828 } 1829 imark = i; 1830 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1831 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1832 } 1833 if (idx) { 1834 *idx = idx_p = mat->rowindices; 1835 if (imark > -1) { 1836 for (i=0; i<imark; i++) { 1837 idx_p[i] = cmap[cworkB[i]]; 1838 } 1839 } else { 1840 for (i=0; i<nzB; i++) { 1841 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1842 else break; 1843 } 1844 imark = i; 1845 } 1846 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1847 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1848 } 1849 } else { 1850 if (idx) *idx = 0; 1851 if (v) *v = 0; 1852 } 1853 } 1854 *nz = nztot; 1855 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1856 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNCT__ 1861 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1862 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1863 { 1864 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1865 1866 PetscFunctionBegin; 1867 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1868 aij->getrowactive = PETSC_FALSE; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "MatNorm_MPIAIJ" 1874 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1875 { 1876 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1877 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1878 PetscErrorCode ierr; 1879 PetscInt i,j,cstart = mat->cmap->rstart; 1880 PetscReal sum = 0.0; 1881 MatScalar *v; 1882 1883 PetscFunctionBegin; 1884 if (aij->size == 1) { 1885 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1886 } else { 1887 if (type == NORM_FROBENIUS) { 1888 v = amat->a; 1889 for (i=0; i<amat->nz; i++) { 1890 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1891 } 1892 v = bmat->a; 1893 for (i=0; i<bmat->nz; i++) { 1894 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1895 } 1896 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 *norm = PetscSqrtReal(*norm); 1898 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1899 } else if (type == NORM_1) { /* max column norm */ 1900 PetscReal *tmp,*tmp2; 1901 PetscInt *jj,*garray = aij->garray; 1902 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1903 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1904 *norm = 0.0; 1905 v = amat->a; jj = amat->j; 1906 for (j=0; j<amat->nz; j++) { 1907 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1908 } 1909 v = bmat->a; jj = bmat->j; 1910 for (j=0; j<bmat->nz; j++) { 1911 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1912 } 1913 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1914 for (j=0; j<mat->cmap->N; j++) { 1915 if (tmp2[j] > *norm) *norm = tmp2[j]; 1916 } 1917 ierr = PetscFree(tmp);CHKERRQ(ierr); 1918 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1919 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1920 } else if (type == NORM_INFINITY) { /* max row norm */ 1921 PetscReal ntemp = 0.0; 1922 for (j=0; j<aij->A->rmap->n; j++) { 1923 v = amat->a + amat->i[j]; 1924 sum = 0.0; 1925 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1926 sum += PetscAbsScalar(*v); v++; 1927 } 1928 v = bmat->a + bmat->i[j]; 1929 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1930 sum += PetscAbsScalar(*v); v++; 1931 } 1932 if (sum > ntemp) ntemp = sum; 1933 } 1934 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1936 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1937 } 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatTranspose_MPIAIJ" 1943 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1944 { 1945 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1946 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1947 PetscErrorCode ierr; 1948 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1949 PetscInt cstart = A->cmap->rstart,ncol; 1950 Mat B; 1951 MatScalar *array; 1952 1953 PetscFunctionBegin; 1954 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1955 1956 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1957 ai = Aloc->i; aj = Aloc->j; 1958 bi = Bloc->i; bj = Bloc->j; 1959 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1960 PetscInt *d_nnz,*g_nnz,*o_nnz; 1961 PetscSFNode *oloc; 1962 PETSC_UNUSED PetscSF sf; 1963 1964 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1965 /* compute d_nnz for preallocation */ 1966 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) { 1968 d_nnz[aj[i]]++; 1969 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1970 } 1971 /* compute local off-diagonal contributions */ 1972 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1973 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1974 /* map those to global */ 1975 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1976 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1977 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1978 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1979 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1980 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1982 1983 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1984 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1985 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1986 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1987 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1988 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1989 } else { 1990 B = *matout; 1991 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1993 } 1994 1995 /* copy over the A part */ 1996 array = Aloc->a; 1997 row = A->rmap->rstart; 1998 for (i=0; i<ma; i++) { 1999 ncol = ai[i+1]-ai[i]; 2000 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2001 row++; 2002 array += ncol; aj += ncol; 2003 } 2004 aj = Aloc->j; 2005 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2006 2007 /* copy over the B part */ 2008 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2009 array = Bloc->a; 2010 row = A->rmap->rstart; 2011 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2012 cols_tmp = cols; 2013 for (i=0; i<mb; i++) { 2014 ncol = bi[i+1]-bi[i]; 2015 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2016 row++; 2017 array += ncol; cols_tmp += ncol; 2018 } 2019 ierr = PetscFree(cols);CHKERRQ(ierr); 2020 2021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2024 *matout = B; 2025 } else { 2026 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2033 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2034 { 2035 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2036 Mat a = aij->A,b = aij->B; 2037 PetscErrorCode ierr; 2038 PetscInt s1,s2,s3; 2039 2040 PetscFunctionBegin; 2041 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2042 if (rr) { 2043 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2044 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2045 /* Overlap communication with computation. */ 2046 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2047 } 2048 if (ll) { 2049 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2050 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2051 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2052 } 2053 /* scale the diagonal block */ 2054 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2055 2056 if (rr) { 2057 /* Do a scatter end and then right scale the off-diagonal block */ 2058 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2059 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2066 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2067 { 2068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatEqual_MPIAIJ" 2078 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2079 { 2080 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2081 Mat a,b,c,d; 2082 PetscBool flg; 2083 PetscErrorCode ierr; 2084 2085 PetscFunctionBegin; 2086 a = matA->A; b = matA->B; 2087 c = matB->A; d = matB->B; 2088 2089 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2090 if (flg) { 2091 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2092 } 2093 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatCopy_MPIAIJ" 2099 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2100 { 2101 PetscErrorCode ierr; 2102 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2103 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2104 2105 PetscFunctionBegin; 2106 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2107 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2108 /* because of the column compression in the off-processor part of the matrix a->B, 2109 the number of columns in a->B and b->B may be different, hence we cannot call 2110 the MatCopy() directly on the two parts. If need be, we can provide a more 2111 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2112 then copying the submatrices */ 2113 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2116 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatSetUp_MPIAIJ" 2123 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2124 { 2125 PetscErrorCode ierr; 2126 2127 PetscFunctionBegin; 2128 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2129 PetscFunctionReturn(0); 2130 } 2131 2132 /* 2133 Computes the number of nonzeros per row needed for preallocation when X and Y 2134 have different nonzero structure. 2135 */ 2136 #undef __FUNCT__ 2137 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2138 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2139 { 2140 PetscInt i,j,k,nzx,nzy; 2141 2142 PetscFunctionBegin; 2143 /* Set the number of nonzeros in the new matrix */ 2144 for (i=0; i<m; i++) { 2145 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2146 nzx = xi[i+1] - xi[i]; 2147 nzy = yi[i+1] - yi[i]; 2148 nnz[i] = 0; 2149 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2150 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2151 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2152 nnz[i]++; 2153 } 2154 for (; k<nzy; k++) nnz[i]++; 2155 } 2156 PetscFunctionReturn(0); 2157 } 2158 2159 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2162 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2163 { 2164 PetscErrorCode ierr; 2165 PetscInt m = Y->rmap->N; 2166 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2167 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2168 2169 PetscFunctionBegin; 2170 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatAXPY_MPIAIJ" 2176 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2177 { 2178 PetscErrorCode ierr; 2179 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2180 PetscBLASInt bnz,one=1; 2181 Mat_SeqAIJ *x,*y; 2182 2183 PetscFunctionBegin; 2184 if (str == SAME_NONZERO_PATTERN) { 2185 PetscScalar alpha = a; 2186 x = (Mat_SeqAIJ*)xx->A->data; 2187 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2188 y = (Mat_SeqAIJ*)yy->A->data; 2189 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2190 x = (Mat_SeqAIJ*)xx->B->data; 2191 y = (Mat_SeqAIJ*)yy->B->data; 2192 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2193 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2194 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2195 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2196 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2197 } else { 2198 Mat B; 2199 PetscInt *nnz_d,*nnz_o; 2200 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2201 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2202 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2203 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2204 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2205 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2206 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2207 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2208 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2209 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2210 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2211 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2212 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2213 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2214 } 2215 PetscFunctionReturn(0); 2216 } 2217 2218 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatConjugate_MPIAIJ" 2222 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2223 { 2224 #if defined(PETSC_USE_COMPLEX) 2225 PetscErrorCode ierr; 2226 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2227 2228 PetscFunctionBegin; 2229 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2230 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2231 #else 2232 PetscFunctionBegin; 2233 #endif 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatRealPart_MPIAIJ" 2239 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 2244 PetscFunctionBegin; 2245 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2246 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNCT__ 2251 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2252 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2253 { 2254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2255 PetscErrorCode ierr; 2256 2257 PetscFunctionBegin; 2258 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2259 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2265 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2266 { 2267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2268 PetscErrorCode ierr; 2269 PetscInt i,*idxb = 0; 2270 PetscScalar *va,*vb; 2271 Vec vtmp; 2272 2273 PetscFunctionBegin; 2274 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2275 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2276 if (idx) { 2277 for (i=0; i<A->rmap->n; i++) { 2278 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2279 } 2280 } 2281 2282 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2283 if (idx) { 2284 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2285 } 2286 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2287 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2288 2289 for (i=0; i<A->rmap->n; i++) { 2290 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2291 va[i] = vb[i]; 2292 if (idx) idx[i] = a->garray[idxb[i]]; 2293 } 2294 } 2295 2296 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2297 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2298 ierr = PetscFree(idxb);CHKERRQ(ierr); 2299 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2305 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,*idxb = 0; 2310 PetscScalar *va,*vb; 2311 Vec vtmp; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<A->cmap->n; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2323 if (idx) { 2324 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2325 } 2326 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2327 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2328 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2331 va[i] = vb[i]; 2332 if (idx) idx[i] = a->garray[idxb[i]]; 2333 } 2334 } 2335 2336 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2338 ierr = PetscFree(idxb);CHKERRQ(ierr); 2339 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 #undef __FUNCT__ 2344 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2348 PetscInt n = A->rmap->n; 2349 PetscInt cstart = A->cmap->rstart; 2350 PetscInt *cmap = mat->garray; 2351 PetscInt *diagIdx, *offdiagIdx; 2352 Vec diagV, offdiagV; 2353 PetscScalar *a, *diagA, *offdiagA; 2354 PetscInt r; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2361 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 for (r = 0; r < n; ++r) { 2367 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2368 a[r] = diagA[r]; 2369 idx[r] = cstart + diagIdx[r]; 2370 } else { 2371 a[r] = offdiagA[r]; 2372 idx[r] = cmap[offdiagIdx[r]]; 2373 } 2374 } 2375 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2380 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2386 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2387 { 2388 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2389 PetscInt n = A->rmap->n; 2390 PetscInt cstart = A->cmap->rstart; 2391 PetscInt *cmap = mat->garray; 2392 PetscInt *diagIdx, *offdiagIdx; 2393 Vec diagV, offdiagV; 2394 PetscScalar *a, *diagA, *offdiagA; 2395 PetscInt r; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2401 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2402 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2403 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2404 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2405 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2406 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2407 for (r = 0; r < n; ++r) { 2408 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 idx[r] = cstart + diagIdx[r]; 2411 } else { 2412 a[r] = offdiagA[r]; 2413 idx[r] = cmap[offdiagIdx[r]]; 2414 } 2415 } 2416 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2420 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2421 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2427 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2428 { 2429 PetscErrorCode ierr; 2430 Mat *dummy; 2431 2432 PetscFunctionBegin; 2433 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2434 *newmat = *dummy; 2435 ierr = PetscFree(dummy);CHKERRQ(ierr); 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2441 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2442 { 2443 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2444 PetscErrorCode ierr; 2445 2446 PetscFunctionBegin; 2447 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2448 A->factorerrortype = a->A->factorerrortype; 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2454 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2455 { 2456 PetscErrorCode ierr; 2457 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2458 2459 PetscFunctionBegin; 2460 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2461 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2462 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2463 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2469 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2470 { 2471 PetscFunctionBegin; 2472 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2473 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2479 /*@ 2480 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2481 2482 Collective on Mat 2483 2484 Input Parameters: 2485 + A - the matrix 2486 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2487 2488 Level: advanced 2489 2490 @*/ 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2492 { 2493 PetscErrorCode ierr; 2494 2495 PetscFunctionBegin; 2496 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2502 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2503 { 2504 PetscErrorCode ierr; 2505 PetscBool sc = PETSC_FALSE,flg; 2506 2507 PetscFunctionBegin; 2508 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2509 ierr = PetscObjectOptionsBegin((PetscObject)A); 2510 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2511 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2512 if (flg) { 2513 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2514 } 2515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 #undef __FUNCT__ 2520 #define __FUNCT__ "MatShift_MPIAIJ" 2521 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2522 { 2523 PetscErrorCode ierr; 2524 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2525 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2526 2527 PetscFunctionBegin; 2528 if (!Y->preallocated) { 2529 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2530 } else if (!aij->nz) { 2531 PetscInt nonew = aij->nonew; 2532 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2533 aij->nonew = nonew; 2534 } 2535 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #undef __FUNCT__ 2540 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2541 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2542 { 2543 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2544 PetscErrorCode ierr; 2545 2546 PetscFunctionBegin; 2547 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2548 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2549 if (d) { 2550 PetscInt rstart; 2551 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2552 *d += rstart; 2553 2554 } 2555 PetscFunctionReturn(0); 2556 } 2557 2558 2559 /* -------------------------------------------------------------------*/ 2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2561 MatGetRow_MPIAIJ, 2562 MatRestoreRow_MPIAIJ, 2563 MatMult_MPIAIJ, 2564 /* 4*/ MatMultAdd_MPIAIJ, 2565 MatMultTranspose_MPIAIJ, 2566 MatMultTransposeAdd_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 /*10*/ 0, 2571 0, 2572 0, 2573 MatSOR_MPIAIJ, 2574 MatTranspose_MPIAIJ, 2575 /*15*/ MatGetInfo_MPIAIJ, 2576 MatEqual_MPIAIJ, 2577 MatGetDiagonal_MPIAIJ, 2578 MatDiagonalScale_MPIAIJ, 2579 MatNorm_MPIAIJ, 2580 /*20*/ MatAssemblyBegin_MPIAIJ, 2581 MatAssemblyEnd_MPIAIJ, 2582 MatSetOption_MPIAIJ, 2583 MatZeroEntries_MPIAIJ, 2584 /*24*/ MatZeroRows_MPIAIJ, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIAIJ, 2590 0, 2591 0, 2592 MatGetDiagonalBlock_MPIAIJ, 2593 0, 2594 /*34*/ MatDuplicate_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 0, 2599 /*39*/ MatAXPY_MPIAIJ, 2600 MatGetSubMatrices_MPIAIJ, 2601 MatIncreaseOverlap_MPIAIJ, 2602 MatGetValues_MPIAIJ, 2603 MatCopy_MPIAIJ, 2604 /*44*/ MatGetRowMax_MPIAIJ, 2605 MatScale_MPIAIJ, 2606 MatShift_MPIAIJ, 2607 MatDiagonalSet_MPIAIJ, 2608 MatZeroRowsColumns_MPIAIJ, 2609 /*49*/ MatSetRandom_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*54*/ MatFDColoringCreate_MPIXAIJ, 2615 0, 2616 MatSetUnfactored_MPIAIJ, 2617 MatPermute_MPIAIJ, 2618 0, 2619 /*59*/ MatGetSubMatrix_MPIAIJ, 2620 MatDestroy_MPIAIJ, 2621 MatView_MPIAIJ, 2622 0, 2623 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2624 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2625 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2630 MatGetRowMinAbs_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*75*/ MatFDColoringApply_AIJ, 2636 MatSetFromOptions_MPIAIJ, 2637 0, 2638 0, 2639 MatFindZeroDiagonals_MPIAIJ, 2640 /*80*/ 0, 2641 0, 2642 0, 2643 /*83*/ MatLoad_MPIAIJ, 2644 0, 2645 0, 2646 0, 2647 0, 2648 0, 2649 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2650 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2651 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2652 MatPtAP_MPIAIJ_MPIAIJ, 2653 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2654 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 0, 2659 /*99*/ 0, 2660 0, 2661 0, 2662 MatConjugate_MPIAIJ, 2663 0, 2664 /*104*/MatSetValuesRow_MPIAIJ, 2665 MatRealPart_MPIAIJ, 2666 MatImaginaryPart_MPIAIJ, 2667 0, 2668 0, 2669 /*109*/0, 2670 0, 2671 MatGetRowMin_MPIAIJ, 2672 0, 2673 MatMissingDiagonal_MPIAIJ, 2674 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2675 0, 2676 MatGetGhosts_MPIAIJ, 2677 0, 2678 0, 2679 /*119*/0, 2680 0, 2681 0, 2682 0, 2683 MatGetMultiProcBlock_MPIAIJ, 2684 /*124*/MatFindNonzeroRows_MPIAIJ, 2685 MatGetColumnNorms_MPIAIJ, 2686 MatInvertBlockDiagonal_MPIAIJ, 2687 0, 2688 MatGetSubMatricesMPI_MPIAIJ, 2689 /*129*/0, 2690 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2691 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2692 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2693 0, 2694 /*134*/0, 2695 0, 2696 0, 2697 0, 2698 0, 2699 /*139*/0, 2700 0, 2701 0, 2702 MatFDColoringSetUp_MPIXAIJ, 2703 MatFindOffBlockDiagonalEntries_MPIAIJ, 2704 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2705 }; 2706 2707 /* ----------------------------------------------------------------------------------------*/ 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2711 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2712 { 2713 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2718 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2725 { 2726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2727 PetscErrorCode ierr; 2728 2729 PetscFunctionBegin; 2730 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2731 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2732 PetscFunctionReturn(0); 2733 } 2734 2735 #undef __FUNCT__ 2736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2737 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2738 { 2739 Mat_MPIAIJ *b; 2740 PetscErrorCode ierr; 2741 2742 PetscFunctionBegin; 2743 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2744 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745 b = (Mat_MPIAIJ*)B->data; 2746 2747 if (!B->preallocated) { 2748 /* Explicitly create 2 MATSEQAIJ matrices. */ 2749 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2750 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2751 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2752 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2753 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2754 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2755 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2756 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2757 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2758 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2759 } 2760 2761 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2762 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2763 B->preallocated = PETSC_TRUE; 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2770 { 2771 Mat mat; 2772 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2773 PetscErrorCode ierr; 2774 2775 PetscFunctionBegin; 2776 *newmat = 0; 2777 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2778 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2780 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2781 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2782 a = (Mat_MPIAIJ*)mat->data; 2783 2784 mat->factortype = matin->factortype; 2785 mat->assembled = PETSC_TRUE; 2786 mat->insertmode = NOT_SET_VALUES; 2787 mat->preallocated = PETSC_TRUE; 2788 2789 a->size = oldmat->size; 2790 a->rank = oldmat->rank; 2791 a->donotstash = oldmat->donotstash; 2792 a->roworiented = oldmat->roworiented; 2793 a->rowindices = 0; 2794 a->rowvalues = 0; 2795 a->getrowactive = PETSC_FALSE; 2796 2797 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2798 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2799 2800 if (oldmat->colmap) { 2801 #if defined(PETSC_USE_CTABLE) 2802 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2803 #else 2804 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2805 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2806 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2807 #endif 2808 } else a->colmap = 0; 2809 if (oldmat->garray) { 2810 PetscInt len; 2811 len = oldmat->B->cmap->n; 2812 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2813 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2814 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2815 } else a->garray = 0; 2816 2817 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2818 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2819 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2821 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2822 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2823 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2825 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2826 *newmat = mat; 2827 PetscFunctionReturn(0); 2828 } 2829 2830 2831 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatLoad_MPIAIJ" 2834 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2835 { 2836 PetscScalar *vals,*svals; 2837 MPI_Comm comm; 2838 PetscErrorCode ierr; 2839 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2840 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2841 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2842 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2843 PetscInt cend,cstart,n,*rowners; 2844 int fd; 2845 PetscInt bs = newMat->rmap->bs; 2846 2847 PetscFunctionBegin; 2848 /* force binary viewer to load .info file if it has not yet done so */ 2849 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2850 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2851 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2852 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2853 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2854 if (!rank) { 2855 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2856 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2857 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ"); 2858 } 2859 2860 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2861 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2862 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2863 if (bs < 0) bs = 1; 2864 2865 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2866 M = header[1]; N = header[2]; 2867 2868 /* If global sizes are set, check if they are consistent with that given in the file */ 2869 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2870 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2871 2872 /* determine ownership of all (block) rows */ 2873 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2874 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2875 else m = newMat->rmap->n; /* Set by user */ 2876 2877 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2878 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2879 2880 /* First process needs enough room for process with most rows */ 2881 if (!rank) { 2882 mmax = rowners[1]; 2883 for (i=2; i<=size; i++) { 2884 mmax = PetscMax(mmax, rowners[i]); 2885 } 2886 } else mmax = -1; /* unused, but compilers complain */ 2887 2888 rowners[0] = 0; 2889 for (i=2; i<=size; i++) { 2890 rowners[i] += rowners[i-1]; 2891 } 2892 rstart = rowners[rank]; 2893 rend = rowners[rank+1]; 2894 2895 /* distribute row lengths to all processors */ 2896 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2897 if (!rank) { 2898 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2899 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2900 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2901 for (j=0; j<m; j++) { 2902 procsnz[0] += ourlens[j]; 2903 } 2904 for (i=1; i<size; i++) { 2905 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2906 /* calculate the number of nonzeros on each processor */ 2907 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2908 procsnz[i] += rowlengths[j]; 2909 } 2910 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2911 } 2912 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2913 } else { 2914 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2915 } 2916 2917 if (!rank) { 2918 /* determine max buffer needed and allocate it */ 2919 maxnz = 0; 2920 for (i=0; i<size; i++) { 2921 maxnz = PetscMax(maxnz,procsnz[i]); 2922 } 2923 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2924 2925 /* read in my part of the matrix column indices */ 2926 nz = procsnz[0]; 2927 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2928 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2929 2930 /* read in every one elses and ship off */ 2931 for (i=1; i<size; i++) { 2932 nz = procsnz[i]; 2933 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2934 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2935 } 2936 ierr = PetscFree(cols);CHKERRQ(ierr); 2937 } else { 2938 /* determine buffer space needed for message */ 2939 nz = 0; 2940 for (i=0; i<m; i++) { 2941 nz += ourlens[i]; 2942 } 2943 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2944 2945 /* receive message of column indices*/ 2946 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2947 } 2948 2949 /* determine column ownership if matrix is not square */ 2950 if (N != M) { 2951 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2952 else n = newMat->cmap->n; 2953 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2954 cstart = cend - n; 2955 } else { 2956 cstart = rstart; 2957 cend = rend; 2958 n = cend - cstart; 2959 } 2960 2961 /* loop over local rows, determining number of off diagonal entries */ 2962 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2963 jj = 0; 2964 for (i=0; i<m; i++) { 2965 for (j=0; j<ourlens[i]; j++) { 2966 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2967 jj++; 2968 } 2969 } 2970 2971 for (i=0; i<m; i++) { 2972 ourlens[i] -= offlens[i]; 2973 } 2974 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2975 2976 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2977 2978 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2979 2980 for (i=0; i<m; i++) { 2981 ourlens[i] += offlens[i]; 2982 } 2983 2984 if (!rank) { 2985 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2986 2987 /* read in my part of the matrix numerical values */ 2988 nz = procsnz[0]; 2989 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2990 2991 /* insert into matrix */ 2992 jj = rstart; 2993 smycols = mycols; 2994 svals = vals; 2995 for (i=0; i<m; i++) { 2996 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2997 smycols += ourlens[i]; 2998 svals += ourlens[i]; 2999 jj++; 3000 } 3001 3002 /* read in other processors and ship out */ 3003 for (i=1; i<size; i++) { 3004 nz = procsnz[i]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3007 } 3008 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3009 } else { 3010 /* receive numeric values */ 3011 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3012 3013 /* receive message of values*/ 3014 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3015 3016 /* insert into matrix */ 3017 jj = rstart; 3018 smycols = mycols; 3019 svals = vals; 3020 for (i=0; i<m; i++) { 3021 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3022 smycols += ourlens[i]; 3023 svals += ourlens[i]; 3024 jj++; 3025 } 3026 } 3027 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3028 ierr = PetscFree(vals);CHKERRQ(ierr); 3029 ierr = PetscFree(mycols);CHKERRQ(ierr); 3030 ierr = PetscFree(rowners);CHKERRQ(ierr); 3031 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3038 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3039 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3040 { 3041 PetscErrorCode ierr; 3042 IS iscol_local; 3043 PetscInt csize; 3044 3045 PetscFunctionBegin; 3046 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3047 if (call == MAT_REUSE_MATRIX) { 3048 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3049 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3050 } else { 3051 /* check if we are grabbing all columns*/ 3052 PetscBool isstride; 3053 PetscMPIInt lisstride = 0,gisstride; 3054 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3055 if (isstride) { 3056 PetscInt start,len,mstart,mlen; 3057 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3058 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3059 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3060 if (mstart == start && mlen-mstart == len) lisstride = 1; 3061 } 3062 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3063 if (gisstride) { 3064 PetscInt N; 3065 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3066 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3067 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3068 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3069 } else { 3070 PetscInt cbs; 3071 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3072 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3073 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3074 } 3075 } 3076 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3077 if (call == MAT_INITIAL_MATRIX) { 3078 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3079 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3080 } 3081 PetscFunctionReturn(0); 3082 } 3083 3084 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3087 /* 3088 Not great since it makes two copies of the submatrix, first an SeqAIJ 3089 in local and then by concatenating the local matrices the end result. 3090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3091 3092 Note: This requires a sequential iscol with all indices. 3093 */ 3094 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3095 { 3096 PetscErrorCode ierr; 3097 PetscMPIInt rank,size; 3098 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3099 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3100 PetscBool allcolumns, colflag; 3101 Mat M,Mreuse; 3102 MatScalar *vwork,*aa; 3103 MPI_Comm comm; 3104 Mat_SeqAIJ *aij; 3105 3106 PetscFunctionBegin; 3107 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3110 3111 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3112 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3113 if (colflag && ncol == mat->cmap->N) { 3114 allcolumns = PETSC_TRUE; 3115 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3116 } else { 3117 allcolumns = PETSC_FALSE; 3118 } 3119 if (call == MAT_REUSE_MATRIX) { 3120 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3121 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3122 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3123 } else { 3124 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3125 } 3126 3127 /* 3128 m - number of local rows 3129 n - number of columns (same on all processors) 3130 rstart - first row in new global matrix generated 3131 */ 3132 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3133 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3134 if (call == MAT_INITIAL_MATRIX) { 3135 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3136 ii = aij->i; 3137 jj = aij->j; 3138 3139 /* 3140 Determine the number of non-zeros in the diagonal and off-diagonal 3141 portions of the matrix in order to do correct preallocation 3142 */ 3143 3144 /* first get start and end of "diagonal" columns */ 3145 if (csize == PETSC_DECIDE) { 3146 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3147 if (mglobal == n) { /* square matrix */ 3148 nlocal = m; 3149 } else { 3150 nlocal = n/size + ((n % size) > rank); 3151 } 3152 } else { 3153 nlocal = csize; 3154 } 3155 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3156 rstart = rend - nlocal; 3157 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3158 3159 /* next, compute all the lengths */ 3160 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3161 olens = dlens + m; 3162 for (i=0; i<m; i++) { 3163 jend = ii[i+1] - ii[i]; 3164 olen = 0; 3165 dlen = 0; 3166 for (j=0; j<jend; j++) { 3167 if (*jj < rstart || *jj >= rend) olen++; 3168 else dlen++; 3169 jj++; 3170 } 3171 olens[i] = olen; 3172 dlens[i] = dlen; 3173 } 3174 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3175 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3176 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3177 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3178 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3179 ierr = PetscFree(dlens);CHKERRQ(ierr); 3180 } else { 3181 PetscInt ml,nl; 3182 3183 M = *newmat; 3184 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3185 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3186 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3187 /* 3188 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3189 rather than the slower MatSetValues(). 3190 */ 3191 M->was_assembled = PETSC_TRUE; 3192 M->assembled = PETSC_FALSE; 3193 } 3194 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3195 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3196 ii = aij->i; 3197 jj = aij->j; 3198 aa = aij->a; 3199 for (i=0; i<m; i++) { 3200 row = rstart + i; 3201 nz = ii[i+1] - ii[i]; 3202 cwork = jj; jj += nz; 3203 vwork = aa; aa += nz; 3204 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3205 } 3206 3207 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3208 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3209 *newmat = M; 3210 3211 /* save submatrix used in processor for next request */ 3212 if (call == MAT_INITIAL_MATRIX) { 3213 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3214 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3215 } 3216 PetscFunctionReturn(0); 3217 } 3218 3219 #undef __FUNCT__ 3220 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3221 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3222 { 3223 PetscInt m,cstart, cend,j,nnz,i,d; 3224 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3225 const PetscInt *JJ; 3226 PetscScalar *values; 3227 PetscErrorCode ierr; 3228 3229 PetscFunctionBegin; 3230 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3231 3232 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3233 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3234 m = B->rmap->n; 3235 cstart = B->cmap->rstart; 3236 cend = B->cmap->rend; 3237 rstart = B->rmap->rstart; 3238 3239 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3240 3241 #if defined(PETSC_USE_DEBUGGING) 3242 for (i=0; i<m; i++) { 3243 nnz = Ii[i+1]- Ii[i]; 3244 JJ = J + Ii[i]; 3245 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3246 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3247 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3248 } 3249 #endif 3250 3251 for (i=0; i<m; i++) { 3252 nnz = Ii[i+1]- Ii[i]; 3253 JJ = J + Ii[i]; 3254 nnz_max = PetscMax(nnz_max,nnz); 3255 d = 0; 3256 for (j=0; j<nnz; j++) { 3257 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3258 } 3259 d_nnz[i] = d; 3260 o_nnz[i] = nnz - d; 3261 } 3262 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3263 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3264 3265 if (v) values = (PetscScalar*)v; 3266 else { 3267 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3268 } 3269 3270 for (i=0; i<m; i++) { 3271 ii = i + rstart; 3272 nnz = Ii[i+1]- Ii[i]; 3273 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3274 } 3275 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3276 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3277 3278 if (!v) { 3279 ierr = PetscFree(values);CHKERRQ(ierr); 3280 } 3281 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3287 /*@ 3288 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3289 (the default parallel PETSc format). 3290 3291 Collective on MPI_Comm 3292 3293 Input Parameters: 3294 + B - the matrix 3295 . i - the indices into j for the start of each local row (starts with zero) 3296 . j - the column indices for each local row (starts with zero) 3297 - v - optional values in the matrix 3298 3299 Level: developer 3300 3301 Notes: 3302 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3303 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3304 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3305 3306 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3307 3308 The format which is used for the sparse matrix input, is equivalent to a 3309 row-major ordering.. i.e for the following matrix, the input data expected is 3310 as shown 3311 3312 $ 1 0 0 3313 $ 2 0 3 P0 3314 $ ------- 3315 $ 4 5 6 P1 3316 $ 3317 $ Process0 [P0]: rows_owned=[0,1] 3318 $ i = {0,1,3} [size = nrow+1 = 2+1] 3319 $ j = {0,0,2} [size = 3] 3320 $ v = {1,2,3} [size = 3] 3321 $ 3322 $ Process1 [P1]: rows_owned=[2] 3323 $ i = {0,3} [size = nrow+1 = 1+1] 3324 $ j = {0,1,2} [size = 3] 3325 $ v = {4,5,6} [size = 3] 3326 3327 .keywords: matrix, aij, compressed row, sparse, parallel 3328 3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3330 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3331 @*/ 3332 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3333 { 3334 PetscErrorCode ierr; 3335 3336 PetscFunctionBegin; 3337 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3343 /*@C 3344 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3345 (the default parallel PETSc format). For good matrix assembly performance 3346 the user should preallocate the matrix storage by setting the parameters 3347 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3348 performance can be increased by more than a factor of 50. 3349 3350 Collective on MPI_Comm 3351 3352 Input Parameters: 3353 + B - the matrix 3354 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3355 (same value is used for all local rows) 3356 . d_nnz - array containing the number of nonzeros in the various rows of the 3357 DIAGONAL portion of the local submatrix (possibly different for each row) 3358 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3359 The size of this array is equal to the number of local rows, i.e 'm'. 3360 For matrices that will be factored, you must leave room for (and set) 3361 the diagonal entry even if it is zero. 3362 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3363 submatrix (same value is used for all local rows). 3364 - o_nnz - array containing the number of nonzeros in the various rows of the 3365 OFF-DIAGONAL portion of the local submatrix (possibly different for 3366 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3367 structure. The size of this array is equal to the number 3368 of local rows, i.e 'm'. 3369 3370 If the *_nnz parameter is given then the *_nz parameter is ignored 3371 3372 The AIJ format (also called the Yale sparse matrix format or 3373 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3374 storage. The stored row and column indices begin with zero. 3375 See Users-Manual: ch_mat for details. 3376 3377 The parallel matrix is partitioned such that the first m0 rows belong to 3378 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3379 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3380 3381 The DIAGONAL portion of the local submatrix of a processor can be defined 3382 as the submatrix which is obtained by extraction the part corresponding to 3383 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3384 first row that belongs to the processor, r2 is the last row belonging to 3385 the this processor, and c1-c2 is range of indices of the local part of a 3386 vector suitable for applying the matrix to. This is an mxn matrix. In the 3387 common case of a square matrix, the row and column ranges are the same and 3388 the DIAGONAL part is also square. The remaining portion of the local 3389 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3390 3391 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3392 3393 You can call MatGetInfo() to get information on how effective the preallocation was; 3394 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3395 You can also run with the option -info and look for messages with the string 3396 malloc in them to see if additional memory allocation was needed. 3397 3398 Example usage: 3399 3400 Consider the following 8x8 matrix with 34 non-zero values, that is 3401 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3402 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3403 as follows: 3404 3405 .vb 3406 1 2 0 | 0 3 0 | 0 4 3407 Proc0 0 5 6 | 7 0 0 | 8 0 3408 9 0 10 | 11 0 0 | 12 0 3409 ------------------------------------- 3410 13 0 14 | 15 16 17 | 0 0 3411 Proc1 0 18 0 | 19 20 21 | 0 0 3412 0 0 0 | 22 23 0 | 24 0 3413 ------------------------------------- 3414 Proc2 25 26 27 | 0 0 28 | 29 0 3415 30 0 0 | 31 32 33 | 0 34 3416 .ve 3417 3418 This can be represented as a collection of submatrices as: 3419 3420 .vb 3421 A B C 3422 D E F 3423 G H I 3424 .ve 3425 3426 Where the submatrices A,B,C are owned by proc0, D,E,F are 3427 owned by proc1, G,H,I are owned by proc2. 3428 3429 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3430 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3431 The 'M','N' parameters are 8,8, and have the same values on all procs. 3432 3433 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3434 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3435 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3436 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3437 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3438 matrix, ans [DF] as another SeqAIJ matrix. 3439 3440 When d_nz, o_nz parameters are specified, d_nz storage elements are 3441 allocated for every row of the local diagonal submatrix, and o_nz 3442 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3443 One way to choose d_nz and o_nz is to use the max nonzerors per local 3444 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3445 In this case, the values of d_nz,o_nz are: 3446 .vb 3447 proc0 : dnz = 2, o_nz = 2 3448 proc1 : dnz = 3, o_nz = 2 3449 proc2 : dnz = 1, o_nz = 4 3450 .ve 3451 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3452 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3453 for proc3. i.e we are using 12+15+10=37 storage locations to store 3454 34 values. 3455 3456 When d_nnz, o_nnz parameters are specified, the storage is specified 3457 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3458 In the above case the values for d_nnz,o_nnz are: 3459 .vb 3460 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3461 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3462 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3463 .ve 3464 Here the space allocated is sum of all the above values i.e 34, and 3465 hence pre-allocation is perfect. 3466 3467 Level: intermediate 3468 3469 .keywords: matrix, aij, compressed row, sparse, parallel 3470 3471 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3472 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3473 @*/ 3474 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3475 { 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3480 PetscValidType(B,1); 3481 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 #undef __FUNCT__ 3486 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3487 /*@ 3488 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3489 CSR format the local rows. 3490 3491 Collective on MPI_Comm 3492 3493 Input Parameters: 3494 + comm - MPI communicator 3495 . m - number of local rows (Cannot be PETSC_DECIDE) 3496 . n - This value should be the same as the local size used in creating the 3497 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3498 calculated if N is given) For square matrices n is almost always m. 3499 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3500 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3501 . i - row indices 3502 . j - column indices 3503 - a - matrix values 3504 3505 Output Parameter: 3506 . mat - the matrix 3507 3508 Level: intermediate 3509 3510 Notes: 3511 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3512 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3513 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3514 3515 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3516 3517 The format which is used for the sparse matrix input, is equivalent to a 3518 row-major ordering.. i.e for the following matrix, the input data expected is 3519 as shown 3520 3521 $ 1 0 0 3522 $ 2 0 3 P0 3523 $ ------- 3524 $ 4 5 6 P1 3525 $ 3526 $ Process0 [P0]: rows_owned=[0,1] 3527 $ i = {0,1,3} [size = nrow+1 = 2+1] 3528 $ j = {0,0,2} [size = 3] 3529 $ v = {1,2,3} [size = 3] 3530 $ 3531 $ Process1 [P1]: rows_owned=[2] 3532 $ i = {0,3} [size = nrow+1 = 1+1] 3533 $ j = {0,1,2} [size = 3] 3534 $ v = {4,5,6} [size = 3] 3535 3536 .keywords: matrix, aij, compressed row, sparse, parallel 3537 3538 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3539 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3540 @*/ 3541 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3542 { 3543 PetscErrorCode ierr; 3544 3545 PetscFunctionBegin; 3546 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3547 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3548 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3549 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3550 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3551 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3552 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 #undef __FUNCT__ 3557 #define __FUNCT__ "MatCreateAIJ" 3558 /*@C 3559 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3560 (the default parallel PETSc format). For good matrix assembly performance 3561 the user should preallocate the matrix storage by setting the parameters 3562 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3563 performance can be increased by more than a factor of 50. 3564 3565 Collective on MPI_Comm 3566 3567 Input Parameters: 3568 + comm - MPI communicator 3569 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3570 This value should be the same as the local size used in creating the 3571 y vector for the matrix-vector product y = Ax. 3572 . n - This value should be the same as the local size used in creating the 3573 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3574 calculated if N is given) For square matrices n is almost always m. 3575 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3576 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3577 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3578 (same value is used for all local rows) 3579 . d_nnz - array containing the number of nonzeros in the various rows of the 3580 DIAGONAL portion of the local submatrix (possibly different for each row) 3581 or NULL, if d_nz is used to specify the nonzero structure. 3582 The size of this array is equal to the number of local rows, i.e 'm'. 3583 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3584 submatrix (same value is used for all local rows). 3585 - o_nnz - array containing the number of nonzeros in the various rows of the 3586 OFF-DIAGONAL portion of the local submatrix (possibly different for 3587 each row) or NULL, if o_nz is used to specify the nonzero 3588 structure. The size of this array is equal to the number 3589 of local rows, i.e 'm'. 3590 3591 Output Parameter: 3592 . A - the matrix 3593 3594 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3595 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3596 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3597 3598 Notes: 3599 If the *_nnz parameter is given then the *_nz parameter is ignored 3600 3601 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3602 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3603 storage requirements for this matrix. 3604 3605 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3606 processor than it must be used on all processors that share the object for 3607 that argument. 3608 3609 The user MUST specify either the local or global matrix dimensions 3610 (possibly both). 3611 3612 The parallel matrix is partitioned across processors such that the 3613 first m0 rows belong to process 0, the next m1 rows belong to 3614 process 1, the next m2 rows belong to process 2 etc.. where 3615 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3616 values corresponding to [m x N] submatrix. 3617 3618 The columns are logically partitioned with the n0 columns belonging 3619 to 0th partition, the next n1 columns belonging to the next 3620 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3621 3622 The DIAGONAL portion of the local submatrix on any given processor 3623 is the submatrix corresponding to the rows and columns m,n 3624 corresponding to the given processor. i.e diagonal matrix on 3625 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3626 etc. The remaining portion of the local submatrix [m x (N-n)] 3627 constitute the OFF-DIAGONAL portion. The example below better 3628 illustrates this concept. 3629 3630 For a square global matrix we define each processor's diagonal portion 3631 to be its local rows and the corresponding columns (a square submatrix); 3632 each processor's off-diagonal portion encompasses the remainder of the 3633 local matrix (a rectangular submatrix). 3634 3635 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3636 3637 When calling this routine with a single process communicator, a matrix of 3638 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3639 type of communicator, use the construction mechanism: 3640 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3641 3642 By default, this format uses inodes (identical nodes) when possible. 3643 We search for consecutive rows with the same nonzero structure, thereby 3644 reusing matrix information to achieve increased efficiency. 3645 3646 Options Database Keys: 3647 + -mat_no_inode - Do not use inodes 3648 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3649 - -mat_aij_oneindex - Internally use indexing starting at 1 3650 rather than 0. Note that when calling MatSetValues(), 3651 the user still MUST index entries starting at 0! 3652 3653 3654 Example usage: 3655 3656 Consider the following 8x8 matrix with 34 non-zero values, that is 3657 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3658 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3659 as follows: 3660 3661 .vb 3662 1 2 0 | 0 3 0 | 0 4 3663 Proc0 0 5 6 | 7 0 0 | 8 0 3664 9 0 10 | 11 0 0 | 12 0 3665 ------------------------------------- 3666 13 0 14 | 15 16 17 | 0 0 3667 Proc1 0 18 0 | 19 20 21 | 0 0 3668 0 0 0 | 22 23 0 | 24 0 3669 ------------------------------------- 3670 Proc2 25 26 27 | 0 0 28 | 29 0 3671 30 0 0 | 31 32 33 | 0 34 3672 .ve 3673 3674 This can be represented as a collection of submatrices as: 3675 3676 .vb 3677 A B C 3678 D E F 3679 G H I 3680 .ve 3681 3682 Where the submatrices A,B,C are owned by proc0, D,E,F are 3683 owned by proc1, G,H,I are owned by proc2. 3684 3685 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3686 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3687 The 'M','N' parameters are 8,8, and have the same values on all procs. 3688 3689 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3690 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3691 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3692 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3693 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3694 matrix, ans [DF] as another SeqAIJ matrix. 3695 3696 When d_nz, o_nz parameters are specified, d_nz storage elements are 3697 allocated for every row of the local diagonal submatrix, and o_nz 3698 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3699 One way to choose d_nz and o_nz is to use the max nonzerors per local 3700 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3701 In this case, the values of d_nz,o_nz are: 3702 .vb 3703 proc0 : dnz = 2, o_nz = 2 3704 proc1 : dnz = 3, o_nz = 2 3705 proc2 : dnz = 1, o_nz = 4 3706 .ve 3707 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3708 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3709 for proc3. i.e we are using 12+15+10=37 storage locations to store 3710 34 values. 3711 3712 When d_nnz, o_nnz parameters are specified, the storage is specified 3713 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3714 In the above case the values for d_nnz,o_nnz are: 3715 .vb 3716 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3717 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3718 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3719 .ve 3720 Here the space allocated is sum of all the above values i.e 34, and 3721 hence pre-allocation is perfect. 3722 3723 Level: intermediate 3724 3725 .keywords: matrix, aij, compressed row, sparse, parallel 3726 3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3728 MPIAIJ, MatCreateMPIAIJWithArrays() 3729 @*/ 3730 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3731 { 3732 PetscErrorCode ierr; 3733 PetscMPIInt size; 3734 3735 PetscFunctionBegin; 3736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3737 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3738 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3739 if (size > 1) { 3740 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3741 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3742 } else { 3743 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3744 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3745 } 3746 PetscFunctionReturn(0); 3747 } 3748 3749 #undef __FUNCT__ 3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3751 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3752 { 3753 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3754 PetscBool flg; 3755 PetscErrorCode ierr; 3756 3757 PetscFunctionBegin; 3758 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3759 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3760 if (Ad) *Ad = a->A; 3761 if (Ao) *Ao = a->B; 3762 if (colmap) *colmap = a->garray; 3763 PetscFunctionReturn(0); 3764 } 3765 3766 #undef __FUNCT__ 3767 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3768 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3769 { 3770 PetscErrorCode ierr; 3771 PetscInt m,N,i,rstart,nnz,Ii; 3772 PetscInt *indx; 3773 PetscScalar *values; 3774 3775 PetscFunctionBegin; 3776 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3777 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3778 PetscInt *dnz,*onz,sum,bs,cbs; 3779 3780 if (n == PETSC_DECIDE) { 3781 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3782 } 3783 /* Check sum(n) = N */ 3784 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3785 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3786 3787 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3788 rstart -= m; 3789 3790 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3791 for (i=0; i<m; i++) { 3792 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3793 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3794 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3795 } 3796 3797 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3798 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3799 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3800 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3801 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3802 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3803 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3804 } 3805 3806 /* numeric phase */ 3807 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3808 for (i=0; i<m; i++) { 3809 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3810 Ii = i + rstart; 3811 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3812 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3813 } 3814 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 PetscFunctionReturn(0); 3817 } 3818 3819 #undef __FUNCT__ 3820 #define __FUNCT__ "MatFileSplit" 3821 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3822 { 3823 PetscErrorCode ierr; 3824 PetscMPIInt rank; 3825 PetscInt m,N,i,rstart,nnz; 3826 size_t len; 3827 const PetscInt *indx; 3828 PetscViewer out; 3829 char *name; 3830 Mat B; 3831 const PetscScalar *values; 3832 3833 PetscFunctionBegin; 3834 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3835 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3836 /* Should this be the type of the diagonal block of A? */ 3837 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3838 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3839 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3840 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3841 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3842 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3843 for (i=0; i<m; i++) { 3844 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3845 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3846 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3847 } 3848 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3849 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3850 3851 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3852 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3853 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3854 sprintf(name,"%s.%d",outfile,rank); 3855 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3856 ierr = PetscFree(name);CHKERRQ(ierr); 3857 ierr = MatView(B,out);CHKERRQ(ierr); 3858 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3859 ierr = MatDestroy(&B);CHKERRQ(ierr); 3860 PetscFunctionReturn(0); 3861 } 3862 3863 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3864 #undef __FUNCT__ 3865 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3866 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3867 { 3868 PetscErrorCode ierr; 3869 Mat_Merge_SeqsToMPI *merge; 3870 PetscContainer container; 3871 3872 PetscFunctionBegin; 3873 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3874 if (container) { 3875 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3876 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3877 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3878 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3879 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3880 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3881 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3882 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3883 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3884 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3885 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3886 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3887 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3888 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3889 ierr = PetscFree(merge);CHKERRQ(ierr); 3890 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3891 } 3892 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #include <../src/mat/utils/freespace.h> 3897 #include <petscbt.h> 3898 3899 #undef __FUNCT__ 3900 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3901 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3902 { 3903 PetscErrorCode ierr; 3904 MPI_Comm comm; 3905 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3906 PetscMPIInt size,rank,taga,*len_s; 3907 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3908 PetscInt proc,m; 3909 PetscInt **buf_ri,**buf_rj; 3910 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3911 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3912 MPI_Request *s_waits,*r_waits; 3913 MPI_Status *status; 3914 MatScalar *aa=a->a; 3915 MatScalar **abuf_r,*ba_i; 3916 Mat_Merge_SeqsToMPI *merge; 3917 PetscContainer container; 3918 3919 PetscFunctionBegin; 3920 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3921 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3922 3923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3924 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3925 3926 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3927 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3928 3929 bi = merge->bi; 3930 bj = merge->bj; 3931 buf_ri = merge->buf_ri; 3932 buf_rj = merge->buf_rj; 3933 3934 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3935 owners = merge->rowmap->range; 3936 len_s = merge->len_s; 3937 3938 /* send and recv matrix values */ 3939 /*-----------------------------*/ 3940 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3941 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3942 3943 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3944 for (proc=0,k=0; proc<size; proc++) { 3945 if (!len_s[proc]) continue; 3946 i = owners[proc]; 3947 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3948 k++; 3949 } 3950 3951 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3952 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3953 ierr = PetscFree(status);CHKERRQ(ierr); 3954 3955 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3956 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3957 3958 /* insert mat values of mpimat */ 3959 /*----------------------------*/ 3960 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3961 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3962 3963 for (k=0; k<merge->nrecv; k++) { 3964 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3965 nrows = *(buf_ri_k[k]); 3966 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3967 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3968 } 3969 3970 /* set values of ba */ 3971 m = merge->rowmap->n; 3972 for (i=0; i<m; i++) { 3973 arow = owners[rank] + i; 3974 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3975 bnzi = bi[i+1] - bi[i]; 3976 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3977 3978 /* add local non-zero vals of this proc's seqmat into ba */ 3979 anzi = ai[arow+1] - ai[arow]; 3980 aj = a->j + ai[arow]; 3981 aa = a->a + ai[arow]; 3982 nextaj = 0; 3983 for (j=0; nextaj<anzi; j++) { 3984 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 3985 ba_i[j] += aa[nextaj++]; 3986 } 3987 } 3988 3989 /* add received vals into ba */ 3990 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 3991 /* i-th row */ 3992 if (i == *nextrow[k]) { 3993 anzi = *(nextai[k]+1) - *nextai[k]; 3994 aj = buf_rj[k] + *(nextai[k]); 3995 aa = abuf_r[k] + *(nextai[k]); 3996 nextaj = 0; 3997 for (j=0; nextaj<anzi; j++) { 3998 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 3999 ba_i[j] += aa[nextaj++]; 4000 } 4001 } 4002 nextrow[k]++; nextai[k]++; 4003 } 4004 } 4005 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4006 } 4007 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4008 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4009 4010 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4011 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4012 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4013 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4014 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4019 4020 #undef __FUNCT__ 4021 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4022 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4023 { 4024 PetscErrorCode ierr; 4025 Mat B_mpi; 4026 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4027 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4028 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4029 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4030 PetscInt len,proc,*dnz,*onz,bs,cbs; 4031 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4032 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4033 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4034 MPI_Status *status; 4035 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4036 PetscBT lnkbt; 4037 Mat_Merge_SeqsToMPI *merge; 4038 PetscContainer container; 4039 4040 PetscFunctionBegin; 4041 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4042 4043 /* make sure it is a PETSc comm */ 4044 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4046 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4047 4048 ierr = PetscNew(&merge);CHKERRQ(ierr); 4049 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4050 4051 /* determine row ownership */ 4052 /*---------------------------------------------------------*/ 4053 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4054 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4055 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4056 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4057 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4058 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4059 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4060 4061 m = merge->rowmap->n; 4062 owners = merge->rowmap->range; 4063 4064 /* determine the number of messages to send, their lengths */ 4065 /*---------------------------------------------------------*/ 4066 len_s = merge->len_s; 4067 4068 len = 0; /* length of buf_si[] */ 4069 merge->nsend = 0; 4070 for (proc=0; proc<size; proc++) { 4071 len_si[proc] = 0; 4072 if (proc == rank) { 4073 len_s[proc] = 0; 4074 } else { 4075 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4076 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4077 } 4078 if (len_s[proc]) { 4079 merge->nsend++; 4080 nrows = 0; 4081 for (i=owners[proc]; i<owners[proc+1]; i++) { 4082 if (ai[i+1] > ai[i]) nrows++; 4083 } 4084 len_si[proc] = 2*(nrows+1); 4085 len += len_si[proc]; 4086 } 4087 } 4088 4089 /* determine the number and length of messages to receive for ij-structure */ 4090 /*-------------------------------------------------------------------------*/ 4091 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4092 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4093 4094 /* post the Irecv of j-structure */ 4095 /*-------------------------------*/ 4096 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4097 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4098 4099 /* post the Isend of j-structure */ 4100 /*--------------------------------*/ 4101 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4102 4103 for (proc=0, k=0; proc<size; proc++) { 4104 if (!len_s[proc]) continue; 4105 i = owners[proc]; 4106 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4107 k++; 4108 } 4109 4110 /* receives and sends of j-structure are complete */ 4111 /*------------------------------------------------*/ 4112 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4113 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4114 4115 /* send and recv i-structure */ 4116 /*---------------------------*/ 4117 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4118 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4119 4120 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4121 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4122 for (proc=0,k=0; proc<size; proc++) { 4123 if (!len_s[proc]) continue; 4124 /* form outgoing message for i-structure: 4125 buf_si[0]: nrows to be sent 4126 [1:nrows]: row index (global) 4127 [nrows+1:2*nrows+1]: i-structure index 4128 */ 4129 /*-------------------------------------------*/ 4130 nrows = len_si[proc]/2 - 1; 4131 buf_si_i = buf_si + nrows+1; 4132 buf_si[0] = nrows; 4133 buf_si_i[0] = 0; 4134 nrows = 0; 4135 for (i=owners[proc]; i<owners[proc+1]; i++) { 4136 anzi = ai[i+1] - ai[i]; 4137 if (anzi) { 4138 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4139 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4140 nrows++; 4141 } 4142 } 4143 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4144 k++; 4145 buf_si += len_si[proc]; 4146 } 4147 4148 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4149 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4150 4151 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4152 for (i=0; i<merge->nrecv; i++) { 4153 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4154 } 4155 4156 ierr = PetscFree(len_si);CHKERRQ(ierr); 4157 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4158 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4159 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4160 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4161 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4162 ierr = PetscFree(status);CHKERRQ(ierr); 4163 4164 /* compute a local seq matrix in each processor */ 4165 /*----------------------------------------------*/ 4166 /* allocate bi array and free space for accumulating nonzero column info */ 4167 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4168 bi[0] = 0; 4169 4170 /* create and initialize a linked list */ 4171 nlnk = N+1; 4172 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4173 4174 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4175 len = ai[owners[rank+1]] - ai[owners[rank]]; 4176 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4177 4178 current_space = free_space; 4179 4180 /* determine symbolic info for each local row */ 4181 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4182 4183 for (k=0; k<merge->nrecv; k++) { 4184 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4185 nrows = *buf_ri_k[k]; 4186 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4187 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4188 } 4189 4190 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4191 len = 0; 4192 for (i=0; i<m; i++) { 4193 bnzi = 0; 4194 /* add local non-zero cols of this proc's seqmat into lnk */ 4195 arow = owners[rank] + i; 4196 anzi = ai[arow+1] - ai[arow]; 4197 aj = a->j + ai[arow]; 4198 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4199 bnzi += nlnk; 4200 /* add received col data into lnk */ 4201 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4202 if (i == *nextrow[k]) { /* i-th row */ 4203 anzi = *(nextai[k]+1) - *nextai[k]; 4204 aj = buf_rj[k] + *nextai[k]; 4205 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4206 bnzi += nlnk; 4207 nextrow[k]++; nextai[k]++; 4208 } 4209 } 4210 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4211 4212 /* if free space is not available, make more free space */ 4213 if (current_space->local_remaining<bnzi) { 4214 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4215 nspacedouble++; 4216 } 4217 /* copy data into free space, then initialize lnk */ 4218 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4219 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4220 4221 current_space->array += bnzi; 4222 current_space->local_used += bnzi; 4223 current_space->local_remaining -= bnzi; 4224 4225 bi[i+1] = bi[i] + bnzi; 4226 } 4227 4228 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4229 4230 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4231 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4232 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4233 4234 /* create symbolic parallel matrix B_mpi */ 4235 /*---------------------------------------*/ 4236 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4237 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4238 if (n==PETSC_DECIDE) { 4239 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4240 } else { 4241 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4242 } 4243 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4244 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4245 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4246 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4247 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4248 4249 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4250 B_mpi->assembled = PETSC_FALSE; 4251 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4252 merge->bi = bi; 4253 merge->bj = bj; 4254 merge->buf_ri = buf_ri; 4255 merge->buf_rj = buf_rj; 4256 merge->coi = NULL; 4257 merge->coj = NULL; 4258 merge->owners_co = NULL; 4259 4260 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4261 4262 /* attach the supporting struct to B_mpi for reuse */ 4263 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4264 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4265 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4266 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4267 *mpimat = B_mpi; 4268 4269 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4270 PetscFunctionReturn(0); 4271 } 4272 4273 #undef __FUNCT__ 4274 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4275 /*@C 4276 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4277 matrices from each processor 4278 4279 Collective on MPI_Comm 4280 4281 Input Parameters: 4282 + comm - the communicators the parallel matrix will live on 4283 . seqmat - the input sequential matrices 4284 . m - number of local rows (or PETSC_DECIDE) 4285 . n - number of local columns (or PETSC_DECIDE) 4286 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4287 4288 Output Parameter: 4289 . mpimat - the parallel matrix generated 4290 4291 Level: advanced 4292 4293 Notes: 4294 The dimensions of the sequential matrix in each processor MUST be the same. 4295 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4296 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4297 @*/ 4298 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4299 { 4300 PetscErrorCode ierr; 4301 PetscMPIInt size; 4302 4303 PetscFunctionBegin; 4304 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4305 if (size == 1) { 4306 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4307 if (scall == MAT_INITIAL_MATRIX) { 4308 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4309 } else { 4310 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4311 } 4312 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4313 PetscFunctionReturn(0); 4314 } 4315 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4316 if (scall == MAT_INITIAL_MATRIX) { 4317 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4318 } 4319 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4320 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4326 /*@ 4327 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4328 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4329 with MatGetSize() 4330 4331 Not Collective 4332 4333 Input Parameters: 4334 + A - the matrix 4335 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4336 4337 Output Parameter: 4338 . A_loc - the local sequential matrix generated 4339 4340 Level: developer 4341 4342 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4343 4344 @*/ 4345 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4346 { 4347 PetscErrorCode ierr; 4348 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4349 Mat_SeqAIJ *mat,*a,*b; 4350 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4351 MatScalar *aa,*ba,*cam; 4352 PetscScalar *ca; 4353 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4354 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4355 PetscBool match; 4356 MPI_Comm comm; 4357 PetscMPIInt size; 4358 4359 PetscFunctionBegin; 4360 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4361 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4362 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4363 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4364 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4365 4366 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4367 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4368 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4369 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4370 aa = a->a; ba = b->a; 4371 if (scall == MAT_INITIAL_MATRIX) { 4372 if (size == 1) { 4373 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4378 ci[0] = 0; 4379 for (i=0; i<am; i++) { 4380 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4381 } 4382 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4383 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4384 k = 0; 4385 for (i=0; i<am; i++) { 4386 ncols_o = bi[i+1] - bi[i]; 4387 ncols_d = ai[i+1] - ai[i]; 4388 /* off-diagonal portion of A */ 4389 for (jo=0; jo<ncols_o; jo++) { 4390 col = cmap[*bj]; 4391 if (col >= cstart) break; 4392 cj[k] = col; bj++; 4393 ca[k++] = *ba++; 4394 } 4395 /* diagonal portion of A */ 4396 for (j=0; j<ncols_d; j++) { 4397 cj[k] = cstart + *aj++; 4398 ca[k++] = *aa++; 4399 } 4400 /* off-diagonal portion of A */ 4401 for (j=jo; j<ncols_o; j++) { 4402 cj[k] = cmap[*bj++]; 4403 ca[k++] = *ba++; 4404 } 4405 } 4406 /* put together the new matrix */ 4407 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4408 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4409 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4410 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4411 mat->free_a = PETSC_TRUE; 4412 mat->free_ij = PETSC_TRUE; 4413 mat->nonew = 0; 4414 } else if (scall == MAT_REUSE_MATRIX) { 4415 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4416 ci = mat->i; cj = mat->j; cam = mat->a; 4417 for (i=0; i<am; i++) { 4418 /* off-diagonal portion of A */ 4419 ncols_o = bi[i+1] - bi[i]; 4420 for (jo=0; jo<ncols_o; jo++) { 4421 col = cmap[*bj]; 4422 if (col >= cstart) break; 4423 *cam++ = *ba++; bj++; 4424 } 4425 /* diagonal portion of A */ 4426 ncols_d = ai[i+1] - ai[i]; 4427 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4428 /* off-diagonal portion of A */ 4429 for (j=jo; j<ncols_o; j++) { 4430 *cam++ = *ba++; bj++; 4431 } 4432 } 4433 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4434 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4440 /*@C 4441 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4442 4443 Not Collective 4444 4445 Input Parameters: 4446 + A - the matrix 4447 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4448 - row, col - index sets of rows and columns to extract (or NULL) 4449 4450 Output Parameter: 4451 . A_loc - the local sequential matrix generated 4452 4453 Level: developer 4454 4455 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4456 4457 @*/ 4458 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4459 { 4460 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4461 PetscErrorCode ierr; 4462 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4463 IS isrowa,iscola; 4464 Mat *aloc; 4465 PetscBool match; 4466 4467 PetscFunctionBegin; 4468 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4469 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4470 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4471 if (!row) { 4472 start = A->rmap->rstart; end = A->rmap->rend; 4473 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4474 } else { 4475 isrowa = *row; 4476 } 4477 if (!col) { 4478 start = A->cmap->rstart; 4479 cmap = a->garray; 4480 nzA = a->A->cmap->n; 4481 nzB = a->B->cmap->n; 4482 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4483 ncols = 0; 4484 for (i=0; i<nzB; i++) { 4485 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4486 else break; 4487 } 4488 imark = i; 4489 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4490 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4491 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4492 } else { 4493 iscola = *col; 4494 } 4495 if (scall != MAT_INITIAL_MATRIX) { 4496 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4497 aloc[0] = *A_loc; 4498 } 4499 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4500 *A_loc = aloc[0]; 4501 ierr = PetscFree(aloc);CHKERRQ(ierr); 4502 if (!row) { 4503 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4504 } 4505 if (!col) { 4506 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4507 } 4508 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatGetBrowsOfAcols" 4514 /*@C 4515 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4516 4517 Collective on Mat 4518 4519 Input Parameters: 4520 + A,B - the matrices in mpiaij format 4521 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4522 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4523 4524 Output Parameter: 4525 + rowb, colb - index sets of rows and columns of B to extract 4526 - B_seq - the sequential matrix generated 4527 4528 Level: developer 4529 4530 @*/ 4531 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4532 { 4533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4534 PetscErrorCode ierr; 4535 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4536 IS isrowb,iscolb; 4537 Mat *bseq=NULL; 4538 4539 PetscFunctionBegin; 4540 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4541 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4542 } 4543 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4544 4545 if (scall == MAT_INITIAL_MATRIX) { 4546 start = A->cmap->rstart; 4547 cmap = a->garray; 4548 nzA = a->A->cmap->n; 4549 nzB = a->B->cmap->n; 4550 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4551 ncols = 0; 4552 for (i=0; i<nzB; i++) { /* row < local row index */ 4553 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4554 else break; 4555 } 4556 imark = i; 4557 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4558 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4559 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4560 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4561 } else { 4562 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4563 isrowb = *rowb; iscolb = *colb; 4564 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4565 bseq[0] = *B_seq; 4566 } 4567 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4568 *B_seq = bseq[0]; 4569 ierr = PetscFree(bseq);CHKERRQ(ierr); 4570 if (!rowb) { 4571 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4572 } else { 4573 *rowb = isrowb; 4574 } 4575 if (!colb) { 4576 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4577 } else { 4578 *colb = iscolb; 4579 } 4580 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4581 PetscFunctionReturn(0); 4582 } 4583 4584 #undef __FUNCT__ 4585 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4586 /* 4587 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4588 of the OFF-DIAGONAL portion of local A 4589 4590 Collective on Mat 4591 4592 Input Parameters: 4593 + A,B - the matrices in mpiaij format 4594 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4595 4596 Output Parameter: 4597 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4598 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4599 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4600 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4601 4602 Level: developer 4603 4604 */ 4605 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4606 { 4607 VecScatter_MPI_General *gen_to,*gen_from; 4608 PetscErrorCode ierr; 4609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4610 Mat_SeqAIJ *b_oth; 4611 VecScatter ctx =a->Mvctx; 4612 MPI_Comm comm; 4613 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4614 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4615 PetscScalar *rvalues,*svalues; 4616 MatScalar *b_otha,*bufa,*bufA; 4617 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4618 MPI_Request *rwaits = NULL,*swaits = NULL; 4619 MPI_Status *sstatus,rstatus; 4620 PetscMPIInt jj,size; 4621 PetscInt *cols,sbs,rbs; 4622 PetscScalar *vals; 4623 4624 PetscFunctionBegin; 4625 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4626 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4627 4628 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4629 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4630 } 4631 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4632 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4633 4634 gen_to = (VecScatter_MPI_General*)ctx->todata; 4635 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4636 rvalues = gen_from->values; /* holds the length of receiving row */ 4637 svalues = gen_to->values; /* holds the length of sending row */ 4638 nrecvs = gen_from->n; 4639 nsends = gen_to->n; 4640 4641 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4642 srow = gen_to->indices; /* local row index to be sent */ 4643 sstarts = gen_to->starts; 4644 sprocs = gen_to->procs; 4645 sstatus = gen_to->sstatus; 4646 sbs = gen_to->bs; 4647 rstarts = gen_from->starts; 4648 rprocs = gen_from->procs; 4649 rbs = gen_from->bs; 4650 4651 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4652 if (scall == MAT_INITIAL_MATRIX) { 4653 /* i-array */ 4654 /*---------*/ 4655 /* post receives */ 4656 for (i=0; i<nrecvs; i++) { 4657 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4658 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4659 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4660 } 4661 4662 /* pack the outgoing message */ 4663 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4664 4665 sstartsj[0] = 0; 4666 rstartsj[0] = 0; 4667 len = 0; /* total length of j or a array to be sent */ 4668 k = 0; 4669 for (i=0; i<nsends; i++) { 4670 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4671 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4672 for (j=0; j<nrows; j++) { 4673 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4674 for (l=0; l<sbs; l++) { 4675 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4676 4677 rowlen[j*sbs+l] = ncols; 4678 4679 len += ncols; 4680 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4681 } 4682 k++; 4683 } 4684 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4685 4686 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4687 } 4688 /* recvs and sends of i-array are completed */ 4689 i = nrecvs; 4690 while (i--) { 4691 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4692 } 4693 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4694 4695 /* allocate buffers for sending j and a arrays */ 4696 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4697 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4698 4699 /* create i-array of B_oth */ 4700 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4701 4702 b_othi[0] = 0; 4703 len = 0; /* total length of j or a array to be received */ 4704 k = 0; 4705 for (i=0; i<nrecvs; i++) { 4706 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4707 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4708 for (j=0; j<nrows; j++) { 4709 b_othi[k+1] = b_othi[k] + rowlen[j]; 4710 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4711 k++; 4712 } 4713 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4714 } 4715 4716 /* allocate space for j and a arrrays of B_oth */ 4717 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4718 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4719 4720 /* j-array */ 4721 /*---------*/ 4722 /* post receives of j-array */ 4723 for (i=0; i<nrecvs; i++) { 4724 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4725 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4726 } 4727 4728 /* pack the outgoing message j-array */ 4729 k = 0; 4730 for (i=0; i<nsends; i++) { 4731 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4732 bufJ = bufj+sstartsj[i]; 4733 for (j=0; j<nrows; j++) { 4734 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4735 for (ll=0; ll<sbs; ll++) { 4736 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4737 for (l=0; l<ncols; l++) { 4738 *bufJ++ = cols[l]; 4739 } 4740 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4741 } 4742 } 4743 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4744 } 4745 4746 /* recvs and sends of j-array are completed */ 4747 i = nrecvs; 4748 while (i--) { 4749 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4750 } 4751 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4752 } else if (scall == MAT_REUSE_MATRIX) { 4753 sstartsj = *startsj_s; 4754 rstartsj = *startsj_r; 4755 bufa = *bufa_ptr; 4756 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4757 b_otha = b_oth->a; 4758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4759 4760 /* a-array */ 4761 /*---------*/ 4762 /* post receives of a-array */ 4763 for (i=0; i<nrecvs; i++) { 4764 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4765 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4766 } 4767 4768 /* pack the outgoing message a-array */ 4769 k = 0; 4770 for (i=0; i<nsends; i++) { 4771 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4772 bufA = bufa+sstartsj[i]; 4773 for (j=0; j<nrows; j++) { 4774 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4775 for (ll=0; ll<sbs; ll++) { 4776 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4777 for (l=0; l<ncols; l++) { 4778 *bufA++ = vals[l]; 4779 } 4780 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4781 } 4782 } 4783 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4784 } 4785 /* recvs and sends of a-array are completed */ 4786 i = nrecvs; 4787 while (i--) { 4788 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4789 } 4790 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4791 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4792 4793 if (scall == MAT_INITIAL_MATRIX) { 4794 /* put together the new matrix */ 4795 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4796 4797 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4798 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4799 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4800 b_oth->free_a = PETSC_TRUE; 4801 b_oth->free_ij = PETSC_TRUE; 4802 b_oth->nonew = 0; 4803 4804 ierr = PetscFree(bufj);CHKERRQ(ierr); 4805 if (!startsj_s || !bufa_ptr) { 4806 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4807 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4808 } else { 4809 *startsj_s = sstartsj; 4810 *startsj_r = rstartsj; 4811 *bufa_ptr = bufa; 4812 } 4813 } 4814 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4815 PetscFunctionReturn(0); 4816 } 4817 4818 #undef __FUNCT__ 4819 #define __FUNCT__ "MatGetCommunicationStructs" 4820 /*@C 4821 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4822 4823 Not Collective 4824 4825 Input Parameters: 4826 . A - The matrix in mpiaij format 4827 4828 Output Parameter: 4829 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4830 . colmap - A map from global column index to local index into lvec 4831 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4832 4833 Level: developer 4834 4835 @*/ 4836 #if defined(PETSC_USE_CTABLE) 4837 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4838 #else 4839 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4840 #endif 4841 { 4842 Mat_MPIAIJ *a; 4843 4844 PetscFunctionBegin; 4845 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4846 PetscValidPointer(lvec, 2); 4847 PetscValidPointer(colmap, 3); 4848 PetscValidPointer(multScatter, 4); 4849 a = (Mat_MPIAIJ*) A->data; 4850 if (lvec) *lvec = a->lvec; 4851 if (colmap) *colmap = a->colmap; 4852 if (multScatter) *multScatter = a->Mvctx; 4853 PetscFunctionReturn(0); 4854 } 4855 4856 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4857 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4859 #if defined(PETSC_HAVE_ELEMENTAL) 4860 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4861 #endif 4862 4863 #undef __FUNCT__ 4864 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4865 /* 4866 Computes (B'*A')' since computing B*A directly is untenable 4867 4868 n p p 4869 ( ) ( ) ( ) 4870 m ( A ) * n ( B ) = m ( C ) 4871 ( ) ( ) ( ) 4872 4873 */ 4874 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4875 { 4876 PetscErrorCode ierr; 4877 Mat At,Bt,Ct; 4878 4879 PetscFunctionBegin; 4880 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4881 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4882 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4883 ierr = MatDestroy(&At);CHKERRQ(ierr); 4884 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4885 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4886 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4892 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4893 { 4894 PetscErrorCode ierr; 4895 PetscInt m=A->rmap->n,n=B->cmap->n; 4896 Mat Cmat; 4897 4898 PetscFunctionBegin; 4899 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4900 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4901 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4902 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4903 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4904 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4905 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4906 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4907 4908 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4909 4910 *C = Cmat; 4911 PetscFunctionReturn(0); 4912 } 4913 4914 /* ----------------------------------------------------------------*/ 4915 #undef __FUNCT__ 4916 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4917 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4918 { 4919 PetscErrorCode ierr; 4920 4921 PetscFunctionBegin; 4922 if (scall == MAT_INITIAL_MATRIX) { 4923 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4924 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4925 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4926 } 4927 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4928 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4929 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /*MC 4934 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4935 4936 Options Database Keys: 4937 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4938 4939 Level: beginner 4940 4941 .seealso: MatCreateAIJ() 4942 M*/ 4943 4944 #undef __FUNCT__ 4945 #define __FUNCT__ "MatCreate_MPIAIJ" 4946 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4947 { 4948 Mat_MPIAIJ *b; 4949 PetscErrorCode ierr; 4950 PetscMPIInt size; 4951 4952 PetscFunctionBegin; 4953 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4954 4955 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4956 B->data = (void*)b; 4957 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4958 B->assembled = PETSC_FALSE; 4959 B->insertmode = NOT_SET_VALUES; 4960 b->size = size; 4961 4962 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4963 4964 /* build cache for off array entries formed */ 4965 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4966 4967 b->donotstash = PETSC_FALSE; 4968 b->colmap = 0; 4969 b->garray = 0; 4970 b->roworiented = PETSC_TRUE; 4971 4972 /* stuff used for matrix vector multiply */ 4973 b->lvec = NULL; 4974 b->Mvctx = NULL; 4975 4976 /* stuff for MatGetRow() */ 4977 b->rowindices = 0; 4978 b->rowvalues = 0; 4979 b->getrowactive = PETSC_FALSE; 4980 4981 /* flexible pointer used in CUSP/CUSPARSE classes */ 4982 b->spptr = NULL; 4983 4984 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 4985 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 4986 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 4987 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 4988 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 4989 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4990 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4991 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 4992 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 4993 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 4994 #if defined(PETSC_HAVE_ELEMENTAL) 4995 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 4996 #endif 4997 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 4998 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 4999 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5000 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5001 PetscFunctionReturn(0); 5002 } 5003 5004 #undef __FUNCT__ 5005 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5006 /*@C 5007 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5008 and "off-diagonal" part of the matrix in CSR format. 5009 5010 Collective on MPI_Comm 5011 5012 Input Parameters: 5013 + comm - MPI communicator 5014 . m - number of local rows (Cannot be PETSC_DECIDE) 5015 . n - This value should be the same as the local size used in creating the 5016 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5017 calculated if N is given) For square matrices n is almost always m. 5018 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5019 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5020 . i - row indices for "diagonal" portion of matrix 5021 . j - column indices 5022 . a - matrix values 5023 . oi - row indices for "off-diagonal" portion of matrix 5024 . oj - column indices 5025 - oa - matrix values 5026 5027 Output Parameter: 5028 . mat - the matrix 5029 5030 Level: advanced 5031 5032 Notes: 5033 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5034 must free the arrays once the matrix has been destroyed and not before. 5035 5036 The i and j indices are 0 based 5037 5038 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5039 5040 This sets local rows and cannot be used to set off-processor values. 5041 5042 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5043 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5044 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5045 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5046 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5047 communication if it is known that only local entries will be set. 5048 5049 .keywords: matrix, aij, compressed row, sparse, parallel 5050 5051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5052 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5053 @*/ 5054 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5055 { 5056 PetscErrorCode ierr; 5057 Mat_MPIAIJ *maij; 5058 5059 PetscFunctionBegin; 5060 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5061 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5062 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5063 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5064 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5065 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5066 maij = (Mat_MPIAIJ*) (*mat)->data; 5067 5068 (*mat)->preallocated = PETSC_TRUE; 5069 5070 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5071 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5072 5073 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5074 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5075 5076 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5077 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5078 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5079 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5080 5081 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5082 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5083 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5084 PetscFunctionReturn(0); 5085 } 5086 5087 /* 5088 Special version for direct calls from Fortran 5089 */ 5090 #include <petsc/private/fortranimpl.h> 5091 5092 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5093 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5094 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5095 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5096 #endif 5097 5098 /* Change these macros so can be used in void function */ 5099 #undef CHKERRQ 5100 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5101 #undef SETERRQ2 5102 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5103 #undef SETERRQ3 5104 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5105 #undef SETERRQ 5106 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5107 5108 #undef __FUNCT__ 5109 #define __FUNCT__ "matsetvaluesmpiaij_" 5110 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5111 { 5112 Mat mat = *mmat; 5113 PetscInt m = *mm, n = *mn; 5114 InsertMode addv = *maddv; 5115 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5116 PetscScalar value; 5117 PetscErrorCode ierr; 5118 5119 MatCheckPreallocated(mat,1); 5120 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5121 5122 #if defined(PETSC_USE_DEBUG) 5123 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5124 #endif 5125 { 5126 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5127 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5128 PetscBool roworiented = aij->roworiented; 5129 5130 /* Some Variables required in the macro */ 5131 Mat A = aij->A; 5132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5133 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5134 MatScalar *aa = a->a; 5135 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5136 Mat B = aij->B; 5137 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5138 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5139 MatScalar *ba = b->a; 5140 5141 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5142 PetscInt nonew = a->nonew; 5143 MatScalar *ap1,*ap2; 5144 5145 PetscFunctionBegin; 5146 for (i=0; i<m; i++) { 5147 if (im[i] < 0) continue; 5148 #if defined(PETSC_USE_DEBUG) 5149 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5150 #endif 5151 if (im[i] >= rstart && im[i] < rend) { 5152 row = im[i] - rstart; 5153 lastcol1 = -1; 5154 rp1 = aj + ai[row]; 5155 ap1 = aa + ai[row]; 5156 rmax1 = aimax[row]; 5157 nrow1 = ailen[row]; 5158 low1 = 0; 5159 high1 = nrow1; 5160 lastcol2 = -1; 5161 rp2 = bj + bi[row]; 5162 ap2 = ba + bi[row]; 5163 rmax2 = bimax[row]; 5164 nrow2 = bilen[row]; 5165 low2 = 0; 5166 high2 = nrow2; 5167 5168 for (j=0; j<n; j++) { 5169 if (roworiented) value = v[i*n+j]; 5170 else value = v[i+j*m]; 5171 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5172 if (in[j] >= cstart && in[j] < cend) { 5173 col = in[j] - cstart; 5174 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5175 } else if (in[j] < 0) continue; 5176 #if defined(PETSC_USE_DEBUG) 5177 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5178 #endif 5179 else { 5180 if (mat->was_assembled) { 5181 if (!aij->colmap) { 5182 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5183 } 5184 #if defined(PETSC_USE_CTABLE) 5185 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5186 col--; 5187 #else 5188 col = aij->colmap[in[j]] - 1; 5189 #endif 5190 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5191 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5192 col = in[j]; 5193 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5194 B = aij->B; 5195 b = (Mat_SeqAIJ*)B->data; 5196 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5197 rp2 = bj + bi[row]; 5198 ap2 = ba + bi[row]; 5199 rmax2 = bimax[row]; 5200 nrow2 = bilen[row]; 5201 low2 = 0; 5202 high2 = nrow2; 5203 bm = aij->B->rmap->n; 5204 ba = b->a; 5205 } 5206 } else col = in[j]; 5207 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5208 } 5209 } 5210 } else if (!aij->donotstash) { 5211 if (roworiented) { 5212 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5213 } else { 5214 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5215 } 5216 } 5217 } 5218 } 5219 PetscFunctionReturnVoid(); 5220 } 5221 5222