1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 #endif 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1165 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1170 PetscErrorCode ierr; 1171 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1172 int fd; 1173 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1174 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1175 PetscScalar *column_values; 1176 PetscInt message_count,flowcontrolcount; 1177 FILE *file; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1182 nz = A->nz + B->nz; 1183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1184 if (!rank) { 1185 header[0] = MAT_FILE_CLASSID; 1186 header[1] = mat->rmap->N; 1187 header[2] = mat->cmap->N; 1188 1189 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 /* get largest number of rows any processor has */ 1192 rlen = mat->rmap->n; 1193 range = mat->rmap->range; 1194 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1195 } else { 1196 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 rlen = mat->rmap->n; 1198 } 1199 1200 /* load up the local row counts */ 1201 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1202 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1203 1204 /* store the row lengths to the file */ 1205 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1206 if (!rank) { 1207 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1208 for (i=1; i<size; i++) { 1209 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1210 rlen = range[i+1] - range[i]; 1211 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 } 1214 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1217 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1219 } 1220 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1221 1222 /* load up the local column indices */ 1223 nzmax = nz; /* th processor needs space a largest processor needs */ 1224 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1226 cnt = 0; 1227 for (i=0; i<mat->rmap->n; i++) { 1228 for (j=B->i[i]; j<B->i[i+1]; j++) { 1229 if ((col = garray[B->j[j]]) > cstart) break; 1230 column_indices[cnt++] = col; 1231 } 1232 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1233 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1234 } 1235 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1236 1237 /* store the column indices to the file */ 1238 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1239 if (!rank) { 1240 MPI_Status status; 1241 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 for (i=1; i<size; i++) { 1243 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1244 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1245 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1246 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1248 } 1249 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1252 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1255 } 1256 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1257 1258 /* load up the local column values */ 1259 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1260 cnt = 0; 1261 for (i=0; i<mat->rmap->n; i++) { 1262 for (j=B->i[i]; j<B->i[i+1]; j++) { 1263 if (garray[B->j[j]] > cstart) break; 1264 column_values[cnt++] = B->a[j]; 1265 } 1266 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1267 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1268 } 1269 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1270 1271 /* store the column values to the file */ 1272 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1273 if (!rank) { 1274 MPI_Status status; 1275 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1276 for (i=1; i<size; i++) { 1277 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1278 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1279 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1280 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 } 1283 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1286 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(column_values);CHKERRQ(ierr); 1291 1292 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1293 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #include <petscdraw.h> 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1300 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1301 { 1302 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1303 PetscErrorCode ierr; 1304 PetscMPIInt rank = aij->rank,size = aij->size; 1305 PetscBool isdraw,iascii,isbinary; 1306 PetscViewer sviewer; 1307 PetscViewerFormat format; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1313 if (iascii) { 1314 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1315 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1316 MatInfo info; 1317 PetscBool inodes; 1318 1319 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1320 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1321 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1323 if (!inodes) { 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1326 } else { 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1328 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1329 } 1330 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1334 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1337 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1340 PetscInt inodecount,inodelimit,*inodes; 1341 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1342 if (inodes) { 1343 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1344 } else { 1345 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1349 PetscFunctionReturn(0); 1350 } 1351 } else if (isbinary) { 1352 if (size == 1) { 1353 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1354 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1355 } else { 1356 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1357 } 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1363 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { 1368 /* assemble the entire matrix onto first processor. */ 1369 Mat A; 1370 Mat_SeqAIJ *Aloc; 1371 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1372 MatScalar *a; 1373 1374 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1375 if (!rank) { 1376 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1379 } 1380 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1381 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1382 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1383 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1384 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1385 1386 /* copy over the A part */ 1387 Aloc = (Mat_SeqAIJ*)aij->A->data; 1388 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1389 row = mat->rmap->rstart; 1390 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1391 for (i=0; i<m; i++) { 1392 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1393 row++; 1394 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1395 } 1396 aj = Aloc->j; 1397 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1398 1399 /* copy over the B part */ 1400 Aloc = (Mat_SeqAIJ*)aij->B->data; 1401 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1404 ct = cols; 1405 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; 1409 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1410 } 1411 ierr = PetscFree(ct);CHKERRQ(ierr); 1412 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 /* 1415 Everyone has to call to draw the matrix since the graphics waits are 1416 synchronized across all processors that share the PetscDraw object 1417 */ 1418 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1419 if (!rank) { 1420 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1421 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1422 } 1423 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1424 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1425 ierr = MatDestroy(&A);CHKERRQ(ierr); 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatView_MPIAIJ" 1432 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1433 { 1434 PetscErrorCode ierr; 1435 PetscBool iascii,isdraw,issocket,isbinary; 1436 1437 PetscFunctionBegin; 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1440 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1442 if (iascii || isdraw || isbinary || issocket) { 1443 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSOR_MPIAIJ" 1450 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1451 { 1452 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1453 PetscErrorCode ierr; 1454 Vec bb1 = 0; 1455 PetscBool hasop; 1456 1457 PetscFunctionBegin; 1458 if (flag == SOR_APPLY_UPPER) { 1459 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1464 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1465 } 1466 1467 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1468 if (flag & SOR_ZERO_INITIAL_GUESS) { 1469 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1470 its--; 1471 } 1472 1473 while (its--) { 1474 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1475 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 1477 /* update rhs: bb1 = bb - B*x */ 1478 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1479 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1480 1481 /* local sweep */ 1482 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1483 } 1484 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1485 if (flag & SOR_ZERO_INITIAL_GUESS) { 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1487 its--; 1488 } 1489 while (its--) { 1490 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 1493 /* update rhs: bb1 = bb - B*x */ 1494 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1495 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1496 1497 /* local sweep */ 1498 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1499 } 1500 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1501 if (flag & SOR_ZERO_INITIAL_GUESS) { 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1503 its--; 1504 } 1505 while (its--) { 1506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 1509 /* update rhs: bb1 = bb - B*x */ 1510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1512 1513 /* local sweep */ 1514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1515 } 1516 } else if (flag & SOR_EISENSTAT) { 1517 Vec xx1; 1518 1519 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1520 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1521 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 if (!mat->diag) { 1525 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1526 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1527 } 1528 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1529 if (hasop) { 1530 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1531 } else { 1532 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1533 } 1534 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1535 1536 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1537 1538 /* local sweep */ 1539 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1540 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1541 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1542 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1543 1544 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1545 1546 matin->factorerrortype = mat->A->factorerrortype; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatPermute_MPIAIJ" 1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1553 { 1554 Mat aA,aB,Aperm; 1555 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1556 PetscScalar *aa,*ba; 1557 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1558 PetscSF rowsf,sf; 1559 IS parcolp = NULL; 1560 PetscBool done; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1565 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1566 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1567 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1568 1569 /* Invert row permutation to find out where my rows should go */ 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1574 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 1577 /* Invert column permutation to find out where my columns should go */ 1578 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1579 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1581 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1582 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1583 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1585 1586 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1587 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1588 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1589 1590 /* Find out where my gcols should go */ 1591 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1592 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1594 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1595 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1596 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1597 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1598 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1599 1600 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1601 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1602 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 PetscInt row = rdest[i],rowner; 1605 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1606 for (j=ai[i]; j<ai[i+1]; j++) { 1607 PetscInt cowner,col = cdest[aj[j]]; 1608 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1609 if (rowner == cowner) dnnz[i]++; 1610 else onnz[i]++; 1611 } 1612 for (j=bi[i]; j<bi[i+1]; j++) { 1613 PetscInt cowner,col = gcdest[bj[j]]; 1614 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1615 if (rowner == cowner) dnnz[i]++; 1616 else onnz[i]++; 1617 } 1618 } 1619 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1620 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1621 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1622 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1623 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1624 1625 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1626 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1627 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1628 for (i=0; i<m; i++) { 1629 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1630 PetscInt j0,rowlen; 1631 rowlen = ai[i+1] - ai[i]; 1632 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1633 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1634 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1635 } 1636 rowlen = bi[i+1] - bi[i]; 1637 for (j0=j=0; j<rowlen; j0=j) { 1638 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1639 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1640 } 1641 } 1642 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1645 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1646 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1647 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1648 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1649 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1650 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1651 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1652 *B = Aperm; 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1658 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1659 { 1660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1661 PetscErrorCode ierr; 1662 1663 PetscFunctionBegin; 1664 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1665 if (ghosts) *ghosts = aij->garray; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1671 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1672 { 1673 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1674 Mat A = mat->A,B = mat->B; 1675 PetscErrorCode ierr; 1676 PetscReal isend[5],irecv[5]; 1677 1678 PetscFunctionBegin; 1679 info->block_size = 1.0; 1680 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1681 1682 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1683 isend[3] = info->memory; isend[4] = info->mallocs; 1684 1685 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1686 1687 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1688 isend[3] += info->memory; isend[4] += info->mallocs; 1689 if (flag == MAT_LOCAL) { 1690 info->nz_used = isend[0]; 1691 info->nz_allocated = isend[1]; 1692 info->nz_unneeded = isend[2]; 1693 info->memory = isend[3]; 1694 info->mallocs = isend[4]; 1695 } else if (flag == MAT_GLOBAL_MAX) { 1696 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1697 1698 info->nz_used = irecv[0]; 1699 info->nz_allocated = irecv[1]; 1700 info->nz_unneeded = irecv[2]; 1701 info->memory = irecv[3]; 1702 info->mallocs = irecv[4]; 1703 } else if (flag == MAT_GLOBAL_SUM) { 1704 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1705 1706 info->nz_used = irecv[0]; 1707 info->nz_allocated = irecv[1]; 1708 info->nz_unneeded = irecv[2]; 1709 info->memory = irecv[3]; 1710 info->mallocs = irecv[4]; 1711 } 1712 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1713 info->fill_ratio_needed = 0; 1714 info->factor_mallocs = 0; 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatSetOption_MPIAIJ" 1720 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1721 { 1722 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 switch (op) { 1727 case MAT_NEW_NONZERO_LOCATIONS: 1728 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1729 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1730 case MAT_KEEP_NONZERO_PATTERN: 1731 case MAT_NEW_NONZERO_LOCATION_ERR: 1732 case MAT_USE_INODES: 1733 case MAT_IGNORE_ZERO_ENTRIES: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1737 break; 1738 case MAT_ROW_ORIENTED: 1739 MatCheckPreallocated(A,1); 1740 a->roworiented = flg; 1741 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_NEW_DIAGONALS: 1746 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1747 break; 1748 case MAT_IGNORE_OFF_PROC_ENTRIES: 1749 a->donotstash = flg; 1750 break; 1751 case MAT_SPD: 1752 A->spd_set = PETSC_TRUE; 1753 A->spd = flg; 1754 if (flg) { 1755 A->symmetric = PETSC_TRUE; 1756 A->structurally_symmetric = PETSC_TRUE; 1757 A->symmetric_set = PETSC_TRUE; 1758 A->structurally_symmetric_set = PETSC_TRUE; 1759 } 1760 break; 1761 case MAT_SYMMETRIC: 1762 MatCheckPreallocated(A,1); 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_STRUCTURALLY_SYMMETRIC: 1766 MatCheckPreallocated(A,1); 1767 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1768 break; 1769 case MAT_HERMITIAN: 1770 MatCheckPreallocated(A,1); 1771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1772 break; 1773 case MAT_SYMMETRY_ETERNAL: 1774 MatCheckPreallocated(A,1); 1775 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1776 break; 1777 default: 1778 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1779 } 1780 PetscFunctionReturn(0); 1781 } 1782 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatGetRow_MPIAIJ" 1785 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1786 { 1787 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1788 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1789 PetscErrorCode ierr; 1790 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1791 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1792 PetscInt *cmap,*idx_p; 1793 1794 PetscFunctionBegin; 1795 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1796 mat->getrowactive = PETSC_TRUE; 1797 1798 if (!mat->rowvalues && (idx || v)) { 1799 /* 1800 allocate enough space to hold information from the longest row. 1801 */ 1802 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1803 PetscInt max = 1,tmp; 1804 for (i=0; i<matin->rmap->n; i++) { 1805 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1806 if (max < tmp) max = tmp; 1807 } 1808 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1809 } 1810 1811 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1812 lrow = row - rstart; 1813 1814 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1815 if (!v) {pvA = 0; pvB = 0;} 1816 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1817 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1818 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1819 nztot = nzA + nzB; 1820 1821 cmap = mat->garray; 1822 if (v || idx) { 1823 if (nztot) { 1824 /* Sort by increasing column numbers, assuming A and B already sorted */ 1825 PetscInt imark = -1; 1826 if (v) { 1827 *v = v_p = mat->rowvalues; 1828 for (i=0; i<nzB; i++) { 1829 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1830 else break; 1831 } 1832 imark = i; 1833 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1834 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1835 } 1836 if (idx) { 1837 *idx = idx_p = mat->rowindices; 1838 if (imark > -1) { 1839 for (i=0; i<imark; i++) { 1840 idx_p[i] = cmap[cworkB[i]]; 1841 } 1842 } else { 1843 for (i=0; i<nzB; i++) { 1844 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1845 else break; 1846 } 1847 imark = i; 1848 } 1849 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1850 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1851 } 1852 } else { 1853 if (idx) *idx = 0; 1854 if (v) *v = 0; 1855 } 1856 } 1857 *nz = nztot; 1858 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1859 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 #undef __FUNCT__ 1864 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1865 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1866 { 1867 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1868 1869 PetscFunctionBegin; 1870 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1871 aij->getrowactive = PETSC_FALSE; 1872 PetscFunctionReturn(0); 1873 } 1874 1875 #undef __FUNCT__ 1876 #define __FUNCT__ "MatNorm_MPIAIJ" 1877 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1878 { 1879 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1880 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1881 PetscErrorCode ierr; 1882 PetscInt i,j,cstart = mat->cmap->rstart; 1883 PetscReal sum = 0.0; 1884 MatScalar *v; 1885 1886 PetscFunctionBegin; 1887 if (aij->size == 1) { 1888 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1889 } else { 1890 if (type == NORM_FROBENIUS) { 1891 v = amat->a; 1892 for (i=0; i<amat->nz; i++) { 1893 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1894 } 1895 v = bmat->a; 1896 for (i=0; i<bmat->nz; i++) { 1897 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1898 } 1899 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1900 *norm = PetscSqrtReal(*norm); 1901 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1902 } else if (type == NORM_1) { /* max column norm */ 1903 PetscReal *tmp,*tmp2; 1904 PetscInt *jj,*garray = aij->garray; 1905 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1906 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1907 *norm = 0.0; 1908 v = amat->a; jj = amat->j; 1909 for (j=0; j<amat->nz; j++) { 1910 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1911 } 1912 v = bmat->a; jj = bmat->j; 1913 for (j=0; j<bmat->nz; j++) { 1914 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1915 } 1916 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1917 for (j=0; j<mat->cmap->N; j++) { 1918 if (tmp2[j] > *norm) *norm = tmp2[j]; 1919 } 1920 ierr = PetscFree(tmp);CHKERRQ(ierr); 1921 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1922 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1923 } else if (type == NORM_INFINITY) { /* max row norm */ 1924 PetscReal ntemp = 0.0; 1925 for (j=0; j<aij->A->rmap->n; j++) { 1926 v = amat->a + amat->i[j]; 1927 sum = 0.0; 1928 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1929 sum += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a + bmat->i[j]; 1932 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1933 sum += PetscAbsScalar(*v); v++; 1934 } 1935 if (sum > ntemp) ntemp = sum; 1936 } 1937 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1938 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1939 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1940 } 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatTranspose_MPIAIJ" 1946 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1947 { 1948 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1949 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1950 PetscErrorCode ierr; 1951 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1952 PetscInt cstart = A->cmap->rstart,ncol; 1953 Mat B; 1954 MatScalar *array; 1955 1956 PetscFunctionBegin; 1957 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1958 1959 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1960 ai = Aloc->i; aj = Aloc->j; 1961 bi = Bloc->i; bj = Bloc->j; 1962 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1963 PetscInt *d_nnz,*g_nnz,*o_nnz; 1964 PetscSFNode *oloc; 1965 PETSC_UNUSED PetscSF sf; 1966 1967 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1968 /* compute d_nnz for preallocation */ 1969 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1970 for (i=0; i<ai[ma]; i++) { 1971 d_nnz[aj[i]]++; 1972 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1973 } 1974 /* compute local off-diagonal contributions */ 1975 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1976 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1977 /* map those to global */ 1978 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1979 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1980 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1981 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1982 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1983 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1984 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1985 1986 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1987 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1988 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1989 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1990 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1991 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1992 } else { 1993 B = *matout; 1994 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1995 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1996 } 1997 1998 /* copy over the A part */ 1999 array = Aloc->a; 2000 row = A->rmap->rstart; 2001 for (i=0; i<ma; i++) { 2002 ncol = ai[i+1]-ai[i]; 2003 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2004 row++; 2005 array += ncol; aj += ncol; 2006 } 2007 aj = Aloc->j; 2008 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2009 2010 /* copy over the B part */ 2011 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2012 array = Bloc->a; 2013 row = A->rmap->rstart; 2014 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2015 cols_tmp = cols; 2016 for (i=0; i<mb; i++) { 2017 ncol = bi[i+1]-bi[i]; 2018 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2019 row++; 2020 array += ncol; cols_tmp += ncol; 2021 } 2022 ierr = PetscFree(cols);CHKERRQ(ierr); 2023 2024 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2026 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2027 *matout = B; 2028 } else { 2029 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2030 } 2031 PetscFunctionReturn(0); 2032 } 2033 2034 #undef __FUNCT__ 2035 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2036 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2037 { 2038 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2039 Mat a = aij->A,b = aij->B; 2040 PetscErrorCode ierr; 2041 PetscInt s1,s2,s3; 2042 2043 PetscFunctionBegin; 2044 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2045 if (rr) { 2046 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2047 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2048 /* Overlap communication with computation. */ 2049 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2050 } 2051 if (ll) { 2052 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2053 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2054 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2055 } 2056 /* scale the diagonal block */ 2057 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2058 2059 if (rr) { 2060 /* Do a scatter end and then right scale the off-diagonal block */ 2061 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2062 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2063 } 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2069 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2070 { 2071 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2072 PetscErrorCode ierr; 2073 2074 PetscFunctionBegin; 2075 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatEqual_MPIAIJ" 2081 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2082 { 2083 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2084 Mat a,b,c,d; 2085 PetscBool flg; 2086 PetscErrorCode ierr; 2087 2088 PetscFunctionBegin; 2089 a = matA->A; b = matA->B; 2090 c = matB->A; d = matB->B; 2091 2092 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2093 if (flg) { 2094 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2095 } 2096 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatCopy_MPIAIJ" 2102 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2103 { 2104 PetscErrorCode ierr; 2105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2106 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2107 2108 PetscFunctionBegin; 2109 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2110 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2111 /* because of the column compression in the off-processor part of the matrix a->B, 2112 the number of columns in a->B and b->B may be different, hence we cannot call 2113 the MatCopy() directly on the two parts. If need be, we can provide a more 2114 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2115 then copying the submatrices */ 2116 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2117 } else { 2118 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2119 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123 2124 #undef __FUNCT__ 2125 #define __FUNCT__ "MatSetUp_MPIAIJ" 2126 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2127 { 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2132 PetscFunctionReturn(0); 2133 } 2134 2135 /* 2136 Computes the number of nonzeros per row needed for preallocation when X and Y 2137 have different nonzero structure. 2138 */ 2139 #undef __FUNCT__ 2140 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2141 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2142 { 2143 PetscInt i,j,k,nzx,nzy; 2144 2145 PetscFunctionBegin; 2146 /* Set the number of nonzeros in the new matrix */ 2147 for (i=0; i<m; i++) { 2148 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2149 nzx = xi[i+1] - xi[i]; 2150 nzy = yi[i+1] - yi[i]; 2151 nnz[i] = 0; 2152 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2153 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2154 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2155 nnz[i]++; 2156 } 2157 for (; k<nzy; k++) nnz[i]++; 2158 } 2159 PetscFunctionReturn(0); 2160 } 2161 2162 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2165 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2166 { 2167 PetscErrorCode ierr; 2168 PetscInt m = Y->rmap->N; 2169 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2170 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2171 2172 PetscFunctionBegin; 2173 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatAXPY_MPIAIJ" 2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2183 PetscBLASInt bnz,one=1; 2184 Mat_SeqAIJ *x,*y; 2185 2186 PetscFunctionBegin; 2187 if (str == SAME_NONZERO_PATTERN) { 2188 PetscScalar alpha = a; 2189 x = (Mat_SeqAIJ*)xx->A->data; 2190 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2191 y = (Mat_SeqAIJ*)yy->A->data; 2192 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2193 x = (Mat_SeqAIJ*)xx->B->data; 2194 y = (Mat_SeqAIJ*)yy->B->data; 2195 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2196 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2197 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2199 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2200 } else { 2201 Mat B; 2202 PetscInt *nnz_d,*nnz_o; 2203 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2204 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2205 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2206 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2207 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2208 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2209 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2210 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2211 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2212 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2213 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2214 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2215 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2216 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatConjugate_MPIAIJ" 2225 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2226 { 2227 #if defined(PETSC_USE_COMPLEX) 2228 PetscErrorCode ierr; 2229 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2230 2231 PetscFunctionBegin; 2232 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2233 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2234 #else 2235 PetscFunctionBegin; 2236 #endif 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatRealPart_MPIAIJ" 2242 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2243 { 2244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2249 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2255 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2256 { 2257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2262 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2268 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2269 { 2270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 PetscInt i,*idxb = 0; 2273 PetscScalar *va,*vb; 2274 Vec vtmp; 2275 2276 PetscFunctionBegin; 2277 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2278 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2279 if (idx) { 2280 for (i=0; i<A->rmap->n; i++) { 2281 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2282 } 2283 } 2284 2285 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2286 if (idx) { 2287 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2288 } 2289 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2290 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2291 2292 for (i=0; i<A->rmap->n; i++) { 2293 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2294 va[i] = vb[i]; 2295 if (idx) idx[i] = a->garray[idxb[i]]; 2296 } 2297 } 2298 2299 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2300 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2301 ierr = PetscFree(idxb);CHKERRQ(ierr); 2302 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2308 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2309 { 2310 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2311 PetscErrorCode ierr; 2312 PetscInt i,*idxb = 0; 2313 PetscScalar *va,*vb; 2314 Vec vtmp; 2315 2316 PetscFunctionBegin; 2317 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2318 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2319 if (idx) { 2320 for (i=0; i<A->cmap->n; i++) { 2321 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2322 } 2323 } 2324 2325 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2326 if (idx) { 2327 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2328 } 2329 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2330 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2331 2332 for (i=0; i<A->rmap->n; i++) { 2333 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2334 va[i] = vb[i]; 2335 if (idx) idx[i] = a->garray[idxb[i]]; 2336 } 2337 } 2338 2339 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2340 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2341 ierr = PetscFree(idxb);CHKERRQ(ierr); 2342 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2348 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2351 PetscInt n = A->rmap->n; 2352 PetscInt cstart = A->cmap->rstart; 2353 PetscInt *cmap = mat->garray; 2354 PetscInt *diagIdx, *offdiagIdx; 2355 Vec diagV, offdiagV; 2356 PetscScalar *a, *diagA, *offdiagA; 2357 PetscInt r; 2358 PetscErrorCode ierr; 2359 2360 PetscFunctionBegin; 2361 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2362 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2363 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2364 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2365 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2366 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2367 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2368 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2369 for (r = 0; r < n; ++r) { 2370 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2371 a[r] = diagA[r]; 2372 idx[r] = cstart + diagIdx[r]; 2373 } else { 2374 a[r] = offdiagA[r]; 2375 idx[r] = cmap[offdiagIdx[r]]; 2376 } 2377 } 2378 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2379 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2381 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2382 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2383 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2389 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2390 { 2391 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2392 PetscInt n = A->rmap->n; 2393 PetscInt cstart = A->cmap->rstart; 2394 PetscInt *cmap = mat->garray; 2395 PetscInt *diagIdx, *offdiagIdx; 2396 Vec diagV, offdiagV; 2397 PetscScalar *a, *diagA, *offdiagA; 2398 PetscInt r; 2399 PetscErrorCode ierr; 2400 2401 PetscFunctionBegin; 2402 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2403 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2404 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2405 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2406 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2407 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2408 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2409 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2410 for (r = 0; r < n; ++r) { 2411 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 idx[r] = cstart + diagIdx[r]; 2414 } else { 2415 a[r] = offdiagA[r]; 2416 idx[r] = cmap[offdiagIdx[r]]; 2417 } 2418 } 2419 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2420 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2421 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2422 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2423 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2424 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2430 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2431 { 2432 PetscErrorCode ierr; 2433 Mat *dummy; 2434 2435 PetscFunctionBegin; 2436 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2437 *newmat = *dummy; 2438 ierr = PetscFree(dummy);CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2444 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2445 { 2446 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2447 PetscErrorCode ierr; 2448 2449 PetscFunctionBegin; 2450 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2451 A->factorerrortype = a->A->factorerrortype; 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2457 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2458 { 2459 PetscErrorCode ierr; 2460 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2461 2462 PetscFunctionBegin; 2463 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2464 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2465 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2466 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 #undef __FUNCT__ 2471 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2472 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2473 { 2474 PetscFunctionBegin; 2475 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2476 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #undef __FUNCT__ 2481 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2482 /*@ 2483 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2484 2485 Collective on Mat 2486 2487 Input Parameters: 2488 + A - the matrix 2489 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2490 2491 Level: advanced 2492 2493 @*/ 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2495 { 2496 PetscErrorCode ierr; 2497 2498 PetscFunctionBegin; 2499 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2500 PetscFunctionReturn(0); 2501 } 2502 2503 #undef __FUNCT__ 2504 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2505 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2506 { 2507 PetscErrorCode ierr; 2508 PetscBool sc = PETSC_FALSE,flg; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2512 ierr = PetscObjectOptionsBegin((PetscObject)A); 2513 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2514 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2515 if (flg) { 2516 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2517 } 2518 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatShift_MPIAIJ" 2524 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2525 { 2526 PetscErrorCode ierr; 2527 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2528 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2529 2530 PetscFunctionBegin; 2531 if (!Y->preallocated) { 2532 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2533 } else if (!aij->nz) { 2534 PetscInt nonew = aij->nonew; 2535 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2536 aij->nonew = nonew; 2537 } 2538 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 #undef __FUNCT__ 2543 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2544 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2545 { 2546 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2551 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2552 if (d) { 2553 PetscInt rstart; 2554 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2555 *d += rstart; 2556 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 2562 /* -------------------------------------------------------------------*/ 2563 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2564 MatGetRow_MPIAIJ, 2565 MatRestoreRow_MPIAIJ, 2566 MatMult_MPIAIJ, 2567 /* 4*/ MatMultAdd_MPIAIJ, 2568 MatMultTranspose_MPIAIJ, 2569 MatMultTransposeAdd_MPIAIJ, 2570 0, 2571 0, 2572 0, 2573 /*10*/ 0, 2574 0, 2575 0, 2576 MatSOR_MPIAIJ, 2577 MatTranspose_MPIAIJ, 2578 /*15*/ MatGetInfo_MPIAIJ, 2579 MatEqual_MPIAIJ, 2580 MatGetDiagonal_MPIAIJ, 2581 MatDiagonalScale_MPIAIJ, 2582 MatNorm_MPIAIJ, 2583 /*20*/ MatAssemblyBegin_MPIAIJ, 2584 MatAssemblyEnd_MPIAIJ, 2585 MatSetOption_MPIAIJ, 2586 MatZeroEntries_MPIAIJ, 2587 /*24*/ MatZeroRows_MPIAIJ, 2588 0, 2589 0, 2590 0, 2591 0, 2592 /*29*/ MatSetUp_MPIAIJ, 2593 0, 2594 0, 2595 MatGetDiagonalBlock_MPIAIJ, 2596 0, 2597 /*34*/ MatDuplicate_MPIAIJ, 2598 0, 2599 0, 2600 0, 2601 0, 2602 /*39*/ MatAXPY_MPIAIJ, 2603 MatGetSubMatrices_MPIAIJ, 2604 MatIncreaseOverlap_MPIAIJ, 2605 MatGetValues_MPIAIJ, 2606 MatCopy_MPIAIJ, 2607 /*44*/ MatGetRowMax_MPIAIJ, 2608 MatScale_MPIAIJ, 2609 MatShift_MPIAIJ, 2610 MatDiagonalSet_MPIAIJ, 2611 MatZeroRowsColumns_MPIAIJ, 2612 /*49*/ MatSetRandom_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*54*/ MatFDColoringCreate_MPIXAIJ, 2618 0, 2619 MatSetUnfactored_MPIAIJ, 2620 MatPermute_MPIAIJ, 2621 0, 2622 /*59*/ MatGetSubMatrix_MPIAIJ, 2623 MatDestroy_MPIAIJ, 2624 MatView_MPIAIJ, 2625 0, 2626 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2627 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2628 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2629 0, 2630 0, 2631 0, 2632 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2633 MatGetRowMinAbs_MPIAIJ, 2634 0, 2635 0, 2636 0, 2637 0, 2638 /*75*/ MatFDColoringApply_AIJ, 2639 MatSetFromOptions_MPIAIJ, 2640 0, 2641 0, 2642 MatFindZeroDiagonals_MPIAIJ, 2643 /*80*/ 0, 2644 0, 2645 0, 2646 /*83*/ MatLoad_MPIAIJ, 2647 0, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2653 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2654 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2655 MatPtAP_MPIAIJ_MPIAIJ, 2656 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2657 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*99*/ 0, 2663 0, 2664 0, 2665 MatConjugate_MPIAIJ, 2666 0, 2667 /*104*/MatSetValuesRow_MPIAIJ, 2668 MatRealPart_MPIAIJ, 2669 MatImaginaryPart_MPIAIJ, 2670 0, 2671 0, 2672 /*109*/0, 2673 0, 2674 MatGetRowMin_MPIAIJ, 2675 0, 2676 MatMissingDiagonal_MPIAIJ, 2677 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2678 0, 2679 MatGetGhosts_MPIAIJ, 2680 0, 2681 0, 2682 /*119*/0, 2683 0, 2684 0, 2685 0, 2686 MatGetMultiProcBlock_MPIAIJ, 2687 /*124*/MatFindNonzeroRows_MPIAIJ, 2688 MatGetColumnNorms_MPIAIJ, 2689 MatInvertBlockDiagonal_MPIAIJ, 2690 0, 2691 MatGetSubMatricesMPI_MPIAIJ, 2692 /*129*/0, 2693 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2694 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2695 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2696 0, 2697 /*134*/0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*139*/0, 2703 0, 2704 0, 2705 MatFDColoringSetUp_MPIXAIJ, 2706 MatFindOffBlockDiagonalEntries_MPIAIJ, 2707 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2708 }; 2709 2710 /* ----------------------------------------------------------------------------------------*/ 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2714 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2715 { 2716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2717 PetscErrorCode ierr; 2718 2719 PetscFunctionBegin; 2720 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2721 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2722 PetscFunctionReturn(0); 2723 } 2724 2725 #undef __FUNCT__ 2726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2728 { 2729 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2730 PetscErrorCode ierr; 2731 2732 PetscFunctionBegin; 2733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2735 PetscFunctionReturn(0); 2736 } 2737 2738 #undef __FUNCT__ 2739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2740 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2741 { 2742 Mat_MPIAIJ *b; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2747 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2748 b = (Mat_MPIAIJ*)B->data; 2749 2750 #if defined(PETSC_USE_CTABLE) 2751 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2752 #else 2753 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2754 #endif 2755 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2756 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2757 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2758 2759 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2760 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2761 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2762 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2763 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2764 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2765 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2766 2767 if (!B->preallocated) { 2768 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2769 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2770 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2771 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2772 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2773 } 2774 2775 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2776 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2777 B->preallocated = PETSC_TRUE; 2778 B->was_assembled = PETSC_FALSE; 2779 B->assembled = PETSC_FALSE;; 2780 PetscFunctionReturn(0); 2781 } 2782 2783 #undef __FUNCT__ 2784 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2785 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2786 { 2787 Mat mat; 2788 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 *newmat = 0; 2793 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2794 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2796 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2797 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2798 a = (Mat_MPIAIJ*)mat->data; 2799 2800 mat->factortype = matin->factortype; 2801 mat->assembled = PETSC_TRUE; 2802 mat->insertmode = NOT_SET_VALUES; 2803 mat->preallocated = PETSC_TRUE; 2804 2805 a->size = oldmat->size; 2806 a->rank = oldmat->rank; 2807 a->donotstash = oldmat->donotstash; 2808 a->roworiented = oldmat->roworiented; 2809 a->rowindices = 0; 2810 a->rowvalues = 0; 2811 a->getrowactive = PETSC_FALSE; 2812 2813 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2815 2816 if (oldmat->colmap) { 2817 #if defined(PETSC_USE_CTABLE) 2818 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2819 #else 2820 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2821 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2822 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2823 #endif 2824 } else a->colmap = 0; 2825 if (oldmat->garray) { 2826 PetscInt len; 2827 len = oldmat->B->cmap->n; 2828 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2829 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2830 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2831 } else a->garray = 0; 2832 2833 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2834 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2835 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2837 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2839 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2841 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2842 *newmat = mat; 2843 PetscFunctionReturn(0); 2844 } 2845 2846 2847 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatLoad_MPIAIJ" 2850 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2851 { 2852 PetscScalar *vals,*svals; 2853 MPI_Comm comm; 2854 PetscErrorCode ierr; 2855 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2856 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2857 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2858 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2859 PetscInt cend,cstart,n,*rowners; 2860 int fd; 2861 PetscInt bs = newMat->rmap->bs; 2862 2863 PetscFunctionBegin; 2864 /* force binary viewer to load .info file if it has not yet done so */ 2865 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2866 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2867 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2868 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2869 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2870 if (!rank) { 2871 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2872 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2873 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2874 } 2875 2876 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2877 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2878 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2879 if (bs < 0) bs = 1; 2880 2881 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2882 M = header[1]; N = header[2]; 2883 2884 /* If global sizes are set, check if they are consistent with that given in the file */ 2885 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2886 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2887 2888 /* determine ownership of all (block) rows */ 2889 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2890 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2891 else m = newMat->rmap->n; /* Set by user */ 2892 2893 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2894 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2895 2896 /* First process needs enough room for process with most rows */ 2897 if (!rank) { 2898 mmax = rowners[1]; 2899 for (i=2; i<=size; i++) { 2900 mmax = PetscMax(mmax, rowners[i]); 2901 } 2902 } else mmax = -1; /* unused, but compilers complain */ 2903 2904 rowners[0] = 0; 2905 for (i=2; i<=size; i++) { 2906 rowners[i] += rowners[i-1]; 2907 } 2908 rstart = rowners[rank]; 2909 rend = rowners[rank+1]; 2910 2911 /* distribute row lengths to all processors */ 2912 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2913 if (!rank) { 2914 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2915 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2916 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2917 for (j=0; j<m; j++) { 2918 procsnz[0] += ourlens[j]; 2919 } 2920 for (i=1; i<size; i++) { 2921 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2922 /* calculate the number of nonzeros on each processor */ 2923 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2924 procsnz[i] += rowlengths[j]; 2925 } 2926 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2927 } 2928 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2929 } else { 2930 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2931 } 2932 2933 if (!rank) { 2934 /* determine max buffer needed and allocate it */ 2935 maxnz = 0; 2936 for (i=0; i<size; i++) { 2937 maxnz = PetscMax(maxnz,procsnz[i]); 2938 } 2939 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2940 2941 /* read in my part of the matrix column indices */ 2942 nz = procsnz[0]; 2943 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2944 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2945 2946 /* read in every one elses and ship off */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2950 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2951 } 2952 ierr = PetscFree(cols);CHKERRQ(ierr); 2953 } else { 2954 /* determine buffer space needed for message */ 2955 nz = 0; 2956 for (i=0; i<m; i++) { 2957 nz += ourlens[i]; 2958 } 2959 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2960 2961 /* receive message of column indices*/ 2962 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2963 } 2964 2965 /* determine column ownership if matrix is not square */ 2966 if (N != M) { 2967 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2968 else n = newMat->cmap->n; 2969 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2970 cstart = cend - n; 2971 } else { 2972 cstart = rstart; 2973 cend = rend; 2974 n = cend - cstart; 2975 } 2976 2977 /* loop over local rows, determining number of off diagonal entries */ 2978 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2979 jj = 0; 2980 for (i=0; i<m; i++) { 2981 for (j=0; j<ourlens[i]; j++) { 2982 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2983 jj++; 2984 } 2985 } 2986 2987 for (i=0; i<m; i++) { 2988 ourlens[i] -= offlens[i]; 2989 } 2990 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2991 2992 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2993 2994 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2995 2996 for (i=0; i<m; i++) { 2997 ourlens[i] += offlens[i]; 2998 } 2999 3000 if (!rank) { 3001 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3002 3003 /* read in my part of the matrix numerical values */ 3004 nz = procsnz[0]; 3005 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3006 3007 /* insert into matrix */ 3008 jj = rstart; 3009 smycols = mycols; 3010 svals = vals; 3011 for (i=0; i<m; i++) { 3012 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3013 smycols += ourlens[i]; 3014 svals += ourlens[i]; 3015 jj++; 3016 } 3017 3018 /* read in other processors and ship out */ 3019 for (i=1; i<size; i++) { 3020 nz = procsnz[i]; 3021 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3022 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3023 } 3024 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3025 } else { 3026 /* receive numeric values */ 3027 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3028 3029 /* receive message of values*/ 3030 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3031 3032 /* insert into matrix */ 3033 jj = rstart; 3034 smycols = mycols; 3035 svals = vals; 3036 for (i=0; i<m; i++) { 3037 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3038 smycols += ourlens[i]; 3039 svals += ourlens[i]; 3040 jj++; 3041 } 3042 } 3043 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3044 ierr = PetscFree(vals);CHKERRQ(ierr); 3045 ierr = PetscFree(mycols);CHKERRQ(ierr); 3046 ierr = PetscFree(rowners);CHKERRQ(ierr); 3047 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3048 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3049 PetscFunctionReturn(0); 3050 } 3051 3052 #undef __FUNCT__ 3053 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3054 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3055 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3056 { 3057 PetscErrorCode ierr; 3058 IS iscol_local; 3059 PetscInt csize; 3060 3061 PetscFunctionBegin; 3062 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3063 if (call == MAT_REUSE_MATRIX) { 3064 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3065 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3066 } else { 3067 /* check if we are grabbing all columns*/ 3068 PetscBool isstride; 3069 PetscMPIInt lisstride = 0,gisstride; 3070 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3071 if (isstride) { 3072 PetscInt start,len,mstart,mlen; 3073 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3074 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3075 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3076 if (mstart == start && mlen-mstart == len) lisstride = 1; 3077 } 3078 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3079 if (gisstride) { 3080 PetscInt N; 3081 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3082 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3083 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3084 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3085 } else { 3086 PetscInt cbs; 3087 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3088 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3089 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3090 } 3091 } 3092 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3093 if (call == MAT_INITIAL_MATRIX) { 3094 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3095 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3096 } 3097 PetscFunctionReturn(0); 3098 } 3099 3100 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3103 /* 3104 Not great since it makes two copies of the submatrix, first an SeqAIJ 3105 in local and then by concatenating the local matrices the end result. 3106 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3107 3108 Note: This requires a sequential iscol with all indices. 3109 */ 3110 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3111 { 3112 PetscErrorCode ierr; 3113 PetscMPIInt rank,size; 3114 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3115 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3116 PetscBool allcolumns, colflag; 3117 Mat M,Mreuse; 3118 MatScalar *vwork,*aa; 3119 MPI_Comm comm; 3120 Mat_SeqAIJ *aij; 3121 3122 PetscFunctionBegin; 3123 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3124 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3125 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3126 3127 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3128 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3129 if (colflag && ncol == mat->cmap->N) { 3130 allcolumns = PETSC_TRUE; 3131 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3132 } else { 3133 allcolumns = PETSC_FALSE; 3134 } 3135 if (call == MAT_REUSE_MATRIX) { 3136 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3137 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3138 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3139 } else { 3140 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3141 } 3142 3143 /* 3144 m - number of local rows 3145 n - number of columns (same on all processors) 3146 rstart - first row in new global matrix generated 3147 */ 3148 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3149 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3150 if (call == MAT_INITIAL_MATRIX) { 3151 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3152 ii = aij->i; 3153 jj = aij->j; 3154 3155 /* 3156 Determine the number of non-zeros in the diagonal and off-diagonal 3157 portions of the matrix in order to do correct preallocation 3158 */ 3159 3160 /* first get start and end of "diagonal" columns */ 3161 if (csize == PETSC_DECIDE) { 3162 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3163 if (mglobal == n) { /* square matrix */ 3164 nlocal = m; 3165 } else { 3166 nlocal = n/size + ((n % size) > rank); 3167 } 3168 } else { 3169 nlocal = csize; 3170 } 3171 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3172 rstart = rend - nlocal; 3173 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3174 3175 /* next, compute all the lengths */ 3176 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3177 olens = dlens + m; 3178 for (i=0; i<m; i++) { 3179 jend = ii[i+1] - ii[i]; 3180 olen = 0; 3181 dlen = 0; 3182 for (j=0; j<jend; j++) { 3183 if (*jj < rstart || *jj >= rend) olen++; 3184 else dlen++; 3185 jj++; 3186 } 3187 olens[i] = olen; 3188 dlens[i] = dlen; 3189 } 3190 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3191 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3192 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3193 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3194 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3195 ierr = PetscFree(dlens);CHKERRQ(ierr); 3196 } else { 3197 PetscInt ml,nl; 3198 3199 M = *newmat; 3200 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3201 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3202 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3203 /* 3204 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3205 rather than the slower MatSetValues(). 3206 */ 3207 M->was_assembled = PETSC_TRUE; 3208 M->assembled = PETSC_FALSE; 3209 } 3210 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3211 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3212 ii = aij->i; 3213 jj = aij->j; 3214 aa = aij->a; 3215 for (i=0; i<m; i++) { 3216 row = rstart + i; 3217 nz = ii[i+1] - ii[i]; 3218 cwork = jj; jj += nz; 3219 vwork = aa; aa += nz; 3220 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3221 } 3222 3223 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3224 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3225 *newmat = M; 3226 3227 /* save submatrix used in processor for next request */ 3228 if (call == MAT_INITIAL_MATRIX) { 3229 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3230 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3231 } 3232 PetscFunctionReturn(0); 3233 } 3234 3235 #undef __FUNCT__ 3236 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3237 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3238 { 3239 PetscInt m,cstart, cend,j,nnz,i,d; 3240 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3241 const PetscInt *JJ; 3242 PetscScalar *values; 3243 PetscErrorCode ierr; 3244 3245 PetscFunctionBegin; 3246 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3247 3248 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3249 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3250 m = B->rmap->n; 3251 cstart = B->cmap->rstart; 3252 cend = B->cmap->rend; 3253 rstart = B->rmap->rstart; 3254 3255 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3256 3257 #if defined(PETSC_USE_DEBUGGING) 3258 for (i=0; i<m; i++) { 3259 nnz = Ii[i+1]- Ii[i]; 3260 JJ = J + Ii[i]; 3261 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3262 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3263 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3264 } 3265 #endif 3266 3267 for (i=0; i<m; i++) { 3268 nnz = Ii[i+1]- Ii[i]; 3269 JJ = J + Ii[i]; 3270 nnz_max = PetscMax(nnz_max,nnz); 3271 d = 0; 3272 for (j=0; j<nnz; j++) { 3273 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3274 } 3275 d_nnz[i] = d; 3276 o_nnz[i] = nnz - d; 3277 } 3278 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3279 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3280 3281 if (v) values = (PetscScalar*)v; 3282 else { 3283 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3284 } 3285 3286 for (i=0; i<m; i++) { 3287 ii = i + rstart; 3288 nnz = Ii[i+1]- Ii[i]; 3289 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3290 } 3291 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3293 3294 if (!v) { 3295 ierr = PetscFree(values);CHKERRQ(ierr); 3296 } 3297 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 3301 #undef __FUNCT__ 3302 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3303 /*@ 3304 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3305 (the default parallel PETSc format). 3306 3307 Collective on MPI_Comm 3308 3309 Input Parameters: 3310 + B - the matrix 3311 . i - the indices into j for the start of each local row (starts with zero) 3312 . j - the column indices for each local row (starts with zero) 3313 - v - optional values in the matrix 3314 3315 Level: developer 3316 3317 Notes: 3318 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3319 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3320 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3321 3322 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3323 3324 The format which is used for the sparse matrix input, is equivalent to a 3325 row-major ordering.. i.e for the following matrix, the input data expected is 3326 as shown 3327 3328 $ 1 0 0 3329 $ 2 0 3 P0 3330 $ ------- 3331 $ 4 5 6 P1 3332 $ 3333 $ Process0 [P0]: rows_owned=[0,1] 3334 $ i = {0,1,3} [size = nrow+1 = 2+1] 3335 $ j = {0,0,2} [size = 3] 3336 $ v = {1,2,3} [size = 3] 3337 $ 3338 $ Process1 [P1]: rows_owned=[2] 3339 $ i = {0,3} [size = nrow+1 = 1+1] 3340 $ j = {0,1,2} [size = 3] 3341 $ v = {4,5,6} [size = 3] 3342 3343 .keywords: matrix, aij, compressed row, sparse, parallel 3344 3345 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3346 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3347 @*/ 3348 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3349 { 3350 PetscErrorCode ierr; 3351 3352 PetscFunctionBegin; 3353 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3354 PetscFunctionReturn(0); 3355 } 3356 3357 #undef __FUNCT__ 3358 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3359 /*@C 3360 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3361 (the default parallel PETSc format). For good matrix assembly performance 3362 the user should preallocate the matrix storage by setting the parameters 3363 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3364 performance can be increased by more than a factor of 50. 3365 3366 Collective on MPI_Comm 3367 3368 Input Parameters: 3369 + B - the matrix 3370 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3371 (same value is used for all local rows) 3372 . d_nnz - array containing the number of nonzeros in the various rows of the 3373 DIAGONAL portion of the local submatrix (possibly different for each row) 3374 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3375 The size of this array is equal to the number of local rows, i.e 'm'. 3376 For matrices that will be factored, you must leave room for (and set) 3377 the diagonal entry even if it is zero. 3378 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3379 submatrix (same value is used for all local rows). 3380 - o_nnz - array containing the number of nonzeros in the various rows of the 3381 OFF-DIAGONAL portion of the local submatrix (possibly different for 3382 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3383 structure. The size of this array is equal to the number 3384 of local rows, i.e 'm'. 3385 3386 If the *_nnz parameter is given then the *_nz parameter is ignored 3387 3388 The AIJ format (also called the Yale sparse matrix format or 3389 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3390 storage. The stored row and column indices begin with zero. 3391 See Users-Manual: ch_mat for details. 3392 3393 The parallel matrix is partitioned such that the first m0 rows belong to 3394 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3395 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3396 3397 The DIAGONAL portion of the local submatrix of a processor can be defined 3398 as the submatrix which is obtained by extraction the part corresponding to 3399 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3400 first row that belongs to the processor, r2 is the last row belonging to 3401 the this processor, and c1-c2 is range of indices of the local part of a 3402 vector suitable for applying the matrix to. This is an mxn matrix. In the 3403 common case of a square matrix, the row and column ranges are the same and 3404 the DIAGONAL part is also square. The remaining portion of the local 3405 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3406 3407 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3408 3409 You can call MatGetInfo() to get information on how effective the preallocation was; 3410 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3411 You can also run with the option -info and look for messages with the string 3412 malloc in them to see if additional memory allocation was needed. 3413 3414 Example usage: 3415 3416 Consider the following 8x8 matrix with 34 non-zero values, that is 3417 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3418 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3419 as follows: 3420 3421 .vb 3422 1 2 0 | 0 3 0 | 0 4 3423 Proc0 0 5 6 | 7 0 0 | 8 0 3424 9 0 10 | 11 0 0 | 12 0 3425 ------------------------------------- 3426 13 0 14 | 15 16 17 | 0 0 3427 Proc1 0 18 0 | 19 20 21 | 0 0 3428 0 0 0 | 22 23 0 | 24 0 3429 ------------------------------------- 3430 Proc2 25 26 27 | 0 0 28 | 29 0 3431 30 0 0 | 31 32 33 | 0 34 3432 .ve 3433 3434 This can be represented as a collection of submatrices as: 3435 3436 .vb 3437 A B C 3438 D E F 3439 G H I 3440 .ve 3441 3442 Where the submatrices A,B,C are owned by proc0, D,E,F are 3443 owned by proc1, G,H,I are owned by proc2. 3444 3445 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3446 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3447 The 'M','N' parameters are 8,8, and have the same values on all procs. 3448 3449 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3450 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3451 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3452 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3453 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3454 matrix, ans [DF] as another SeqAIJ matrix. 3455 3456 When d_nz, o_nz parameters are specified, d_nz storage elements are 3457 allocated for every row of the local diagonal submatrix, and o_nz 3458 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3459 One way to choose d_nz and o_nz is to use the max nonzerors per local 3460 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3461 In this case, the values of d_nz,o_nz are: 3462 .vb 3463 proc0 : dnz = 2, o_nz = 2 3464 proc1 : dnz = 3, o_nz = 2 3465 proc2 : dnz = 1, o_nz = 4 3466 .ve 3467 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3468 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3469 for proc3. i.e we are using 12+15+10=37 storage locations to store 3470 34 values. 3471 3472 When d_nnz, o_nnz parameters are specified, the storage is specified 3473 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3474 In the above case the values for d_nnz,o_nnz are: 3475 .vb 3476 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3477 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3478 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3479 .ve 3480 Here the space allocated is sum of all the above values i.e 34, and 3481 hence pre-allocation is perfect. 3482 3483 Level: intermediate 3484 3485 .keywords: matrix, aij, compressed row, sparse, parallel 3486 3487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3488 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3489 @*/ 3490 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3491 { 3492 PetscErrorCode ierr; 3493 3494 PetscFunctionBegin; 3495 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3496 PetscValidType(B,1); 3497 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3498 PetscFunctionReturn(0); 3499 } 3500 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3503 /*@ 3504 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3505 CSR format the local rows. 3506 3507 Collective on MPI_Comm 3508 3509 Input Parameters: 3510 + comm - MPI communicator 3511 . m - number of local rows (Cannot be PETSC_DECIDE) 3512 . n - This value should be the same as the local size used in creating the 3513 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3514 calculated if N is given) For square matrices n is almost always m. 3515 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3516 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3517 . i - row indices 3518 . j - column indices 3519 - a - matrix values 3520 3521 Output Parameter: 3522 . mat - the matrix 3523 3524 Level: intermediate 3525 3526 Notes: 3527 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3528 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3529 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3530 3531 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3532 3533 The format which is used for the sparse matrix input, is equivalent to a 3534 row-major ordering.. i.e for the following matrix, the input data expected is 3535 as shown 3536 3537 $ 1 0 0 3538 $ 2 0 3 P0 3539 $ ------- 3540 $ 4 5 6 P1 3541 $ 3542 $ Process0 [P0]: rows_owned=[0,1] 3543 $ i = {0,1,3} [size = nrow+1 = 2+1] 3544 $ j = {0,0,2} [size = 3] 3545 $ v = {1,2,3} [size = 3] 3546 $ 3547 $ Process1 [P1]: rows_owned=[2] 3548 $ i = {0,3} [size = nrow+1 = 1+1] 3549 $ j = {0,1,2} [size = 3] 3550 $ v = {4,5,6} [size = 3] 3551 3552 .keywords: matrix, aij, compressed row, sparse, parallel 3553 3554 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3555 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3556 @*/ 3557 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3558 { 3559 PetscErrorCode ierr; 3560 3561 PetscFunctionBegin; 3562 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3563 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3564 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3565 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3566 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3567 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3568 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3569 PetscFunctionReturn(0); 3570 } 3571 3572 #undef __FUNCT__ 3573 #define __FUNCT__ "MatCreateAIJ" 3574 /*@C 3575 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3576 (the default parallel PETSc format). For good matrix assembly performance 3577 the user should preallocate the matrix storage by setting the parameters 3578 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3579 performance can be increased by more than a factor of 50. 3580 3581 Collective on MPI_Comm 3582 3583 Input Parameters: 3584 + comm - MPI communicator 3585 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3586 This value should be the same as the local size used in creating the 3587 y vector for the matrix-vector product y = Ax. 3588 . n - This value should be the same as the local size used in creating the 3589 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3590 calculated if N is given) For square matrices n is almost always m. 3591 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3592 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3593 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3594 (same value is used for all local rows) 3595 . d_nnz - array containing the number of nonzeros in the various rows of the 3596 DIAGONAL portion of the local submatrix (possibly different for each row) 3597 or NULL, if d_nz is used to specify the nonzero structure. 3598 The size of this array is equal to the number of local rows, i.e 'm'. 3599 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3600 submatrix (same value is used for all local rows). 3601 - o_nnz - array containing the number of nonzeros in the various rows of the 3602 OFF-DIAGONAL portion of the local submatrix (possibly different for 3603 each row) or NULL, if o_nz is used to specify the nonzero 3604 structure. The size of this array is equal to the number 3605 of local rows, i.e 'm'. 3606 3607 Output Parameter: 3608 . A - the matrix 3609 3610 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3611 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3612 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3613 3614 Notes: 3615 If the *_nnz parameter is given then the *_nz parameter is ignored 3616 3617 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3618 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3619 storage requirements for this matrix. 3620 3621 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3622 processor than it must be used on all processors that share the object for 3623 that argument. 3624 3625 The user MUST specify either the local or global matrix dimensions 3626 (possibly both). 3627 3628 The parallel matrix is partitioned across processors such that the 3629 first m0 rows belong to process 0, the next m1 rows belong to 3630 process 1, the next m2 rows belong to process 2 etc.. where 3631 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3632 values corresponding to [m x N] submatrix. 3633 3634 The columns are logically partitioned with the n0 columns belonging 3635 to 0th partition, the next n1 columns belonging to the next 3636 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3637 3638 The DIAGONAL portion of the local submatrix on any given processor 3639 is the submatrix corresponding to the rows and columns m,n 3640 corresponding to the given processor. i.e diagonal matrix on 3641 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3642 etc. The remaining portion of the local submatrix [m x (N-n)] 3643 constitute the OFF-DIAGONAL portion. The example below better 3644 illustrates this concept. 3645 3646 For a square global matrix we define each processor's diagonal portion 3647 to be its local rows and the corresponding columns (a square submatrix); 3648 each processor's off-diagonal portion encompasses the remainder of the 3649 local matrix (a rectangular submatrix). 3650 3651 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3652 3653 When calling this routine with a single process communicator, a matrix of 3654 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3655 type of communicator, use the construction mechanism: 3656 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3657 3658 By default, this format uses inodes (identical nodes) when possible. 3659 We search for consecutive rows with the same nonzero structure, thereby 3660 reusing matrix information to achieve increased efficiency. 3661 3662 Options Database Keys: 3663 + -mat_no_inode - Do not use inodes 3664 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3665 - -mat_aij_oneindex - Internally use indexing starting at 1 3666 rather than 0. Note that when calling MatSetValues(), 3667 the user still MUST index entries starting at 0! 3668 3669 3670 Example usage: 3671 3672 Consider the following 8x8 matrix with 34 non-zero values, that is 3673 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3674 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3675 as follows: 3676 3677 .vb 3678 1 2 0 | 0 3 0 | 0 4 3679 Proc0 0 5 6 | 7 0 0 | 8 0 3680 9 0 10 | 11 0 0 | 12 0 3681 ------------------------------------- 3682 13 0 14 | 15 16 17 | 0 0 3683 Proc1 0 18 0 | 19 20 21 | 0 0 3684 0 0 0 | 22 23 0 | 24 0 3685 ------------------------------------- 3686 Proc2 25 26 27 | 0 0 28 | 29 0 3687 30 0 0 | 31 32 33 | 0 34 3688 .ve 3689 3690 This can be represented as a collection of submatrices as: 3691 3692 .vb 3693 A B C 3694 D E F 3695 G H I 3696 .ve 3697 3698 Where the submatrices A,B,C are owned by proc0, D,E,F are 3699 owned by proc1, G,H,I are owned by proc2. 3700 3701 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3702 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3703 The 'M','N' parameters are 8,8, and have the same values on all procs. 3704 3705 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3706 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3707 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3708 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3709 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3710 matrix, ans [DF] as another SeqAIJ matrix. 3711 3712 When d_nz, o_nz parameters are specified, d_nz storage elements are 3713 allocated for every row of the local diagonal submatrix, and o_nz 3714 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3715 One way to choose d_nz and o_nz is to use the max nonzerors per local 3716 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3717 In this case, the values of d_nz,o_nz are: 3718 .vb 3719 proc0 : dnz = 2, o_nz = 2 3720 proc1 : dnz = 3, o_nz = 2 3721 proc2 : dnz = 1, o_nz = 4 3722 .ve 3723 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3724 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3725 for proc3. i.e we are using 12+15+10=37 storage locations to store 3726 34 values. 3727 3728 When d_nnz, o_nnz parameters are specified, the storage is specified 3729 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3730 In the above case the values for d_nnz,o_nnz are: 3731 .vb 3732 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3733 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3734 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3735 .ve 3736 Here the space allocated is sum of all the above values i.e 34, and 3737 hence pre-allocation is perfect. 3738 3739 Level: intermediate 3740 3741 .keywords: matrix, aij, compressed row, sparse, parallel 3742 3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3744 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3745 @*/ 3746 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3747 { 3748 PetscErrorCode ierr; 3749 PetscMPIInt size; 3750 3751 PetscFunctionBegin; 3752 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3753 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3754 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3755 if (size > 1) { 3756 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3757 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3758 } else { 3759 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3760 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3767 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3768 { 3769 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3770 PetscBool flg; 3771 PetscErrorCode ierr; 3772 3773 PetscFunctionBegin; 3774 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3775 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3776 if (Ad) *Ad = a->A; 3777 if (Ao) *Ao = a->B; 3778 if (colmap) *colmap = a->garray; 3779 PetscFunctionReturn(0); 3780 } 3781 3782 #undef __FUNCT__ 3783 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3784 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3785 { 3786 PetscErrorCode ierr; 3787 PetscInt m,N,i,rstart,nnz,Ii; 3788 PetscInt *indx; 3789 PetscScalar *values; 3790 3791 PetscFunctionBegin; 3792 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3793 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3794 PetscInt *dnz,*onz,sum,bs,cbs; 3795 3796 if (n == PETSC_DECIDE) { 3797 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3798 } 3799 /* Check sum(n) = N */ 3800 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3801 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3802 3803 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3804 rstart -= m; 3805 3806 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3807 for (i=0; i<m; i++) { 3808 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3809 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3810 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3811 } 3812 3813 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3814 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3815 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3816 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3817 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3818 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3819 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3820 } 3821 3822 /* numeric phase */ 3823 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3824 for (i=0; i<m; i++) { 3825 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3826 Ii = i + rstart; 3827 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3828 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3829 } 3830 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #undef __FUNCT__ 3836 #define __FUNCT__ "MatFileSplit" 3837 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3838 { 3839 PetscErrorCode ierr; 3840 PetscMPIInt rank; 3841 PetscInt m,N,i,rstart,nnz; 3842 size_t len; 3843 const PetscInt *indx; 3844 PetscViewer out; 3845 char *name; 3846 Mat B; 3847 const PetscScalar *values; 3848 3849 PetscFunctionBegin; 3850 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3851 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3852 /* Should this be the type of the diagonal block of A? */ 3853 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3854 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3855 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3856 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3857 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3858 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3859 for (i=0; i<m; i++) { 3860 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3861 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3862 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3863 } 3864 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3865 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3866 3867 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3868 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3869 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3870 sprintf(name,"%s.%d",outfile,rank); 3871 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3872 ierr = PetscFree(name);CHKERRQ(ierr); 3873 ierr = MatView(B,out);CHKERRQ(ierr); 3874 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3875 ierr = MatDestroy(&B);CHKERRQ(ierr); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3880 #undef __FUNCT__ 3881 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3882 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3883 { 3884 PetscErrorCode ierr; 3885 Mat_Merge_SeqsToMPI *merge; 3886 PetscContainer container; 3887 3888 PetscFunctionBegin; 3889 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3890 if (container) { 3891 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3892 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3893 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3894 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3904 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3905 ierr = PetscFree(merge);CHKERRQ(ierr); 3906 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3907 } 3908 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3909 PetscFunctionReturn(0); 3910 } 3911 3912 #include <../src/mat/utils/freespace.h> 3913 #include <petscbt.h> 3914 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3917 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3918 { 3919 PetscErrorCode ierr; 3920 MPI_Comm comm; 3921 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3922 PetscMPIInt size,rank,taga,*len_s; 3923 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3924 PetscInt proc,m; 3925 PetscInt **buf_ri,**buf_rj; 3926 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3927 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3928 MPI_Request *s_waits,*r_waits; 3929 MPI_Status *status; 3930 MatScalar *aa=a->a; 3931 MatScalar **abuf_r,*ba_i; 3932 Mat_Merge_SeqsToMPI *merge; 3933 PetscContainer container; 3934 3935 PetscFunctionBegin; 3936 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3937 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3938 3939 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3940 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3941 3942 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3943 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3944 3945 bi = merge->bi; 3946 bj = merge->bj; 3947 buf_ri = merge->buf_ri; 3948 buf_rj = merge->buf_rj; 3949 3950 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3951 owners = merge->rowmap->range; 3952 len_s = merge->len_s; 3953 3954 /* send and recv matrix values */ 3955 /*-----------------------------*/ 3956 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3957 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3958 3959 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3960 for (proc=0,k=0; proc<size; proc++) { 3961 if (!len_s[proc]) continue; 3962 i = owners[proc]; 3963 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3964 k++; 3965 } 3966 3967 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3968 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3969 ierr = PetscFree(status);CHKERRQ(ierr); 3970 3971 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3972 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3973 3974 /* insert mat values of mpimat */ 3975 /*----------------------------*/ 3976 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3977 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3978 3979 for (k=0; k<merge->nrecv; k++) { 3980 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3981 nrows = *(buf_ri_k[k]); 3982 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3983 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3984 } 3985 3986 /* set values of ba */ 3987 m = merge->rowmap->n; 3988 for (i=0; i<m; i++) { 3989 arow = owners[rank] + i; 3990 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3991 bnzi = bi[i+1] - bi[i]; 3992 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3993 3994 /* add local non-zero vals of this proc's seqmat into ba */ 3995 anzi = ai[arow+1] - ai[arow]; 3996 aj = a->j + ai[arow]; 3997 aa = a->a + ai[arow]; 3998 nextaj = 0; 3999 for (j=0; nextaj<anzi; j++) { 4000 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4001 ba_i[j] += aa[nextaj++]; 4002 } 4003 } 4004 4005 /* add received vals into ba */ 4006 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4007 /* i-th row */ 4008 if (i == *nextrow[k]) { 4009 anzi = *(nextai[k]+1) - *nextai[k]; 4010 aj = buf_rj[k] + *(nextai[k]); 4011 aa = abuf_r[k] + *(nextai[k]); 4012 nextaj = 0; 4013 for (j=0; nextaj<anzi; j++) { 4014 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4015 ba_i[j] += aa[nextaj++]; 4016 } 4017 } 4018 nextrow[k]++; nextai[k]++; 4019 } 4020 } 4021 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4022 } 4023 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4024 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4025 4026 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4027 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4028 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4029 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4030 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4035 4036 #undef __FUNCT__ 4037 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4038 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4039 { 4040 PetscErrorCode ierr; 4041 Mat B_mpi; 4042 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4043 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4044 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4045 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4046 PetscInt len,proc,*dnz,*onz,bs,cbs; 4047 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4048 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4049 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4050 MPI_Status *status; 4051 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4052 PetscBT lnkbt; 4053 Mat_Merge_SeqsToMPI *merge; 4054 PetscContainer container; 4055 4056 PetscFunctionBegin; 4057 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4058 4059 /* make sure it is a PETSc comm */ 4060 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4062 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4063 4064 ierr = PetscNew(&merge);CHKERRQ(ierr); 4065 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4066 4067 /* determine row ownership */ 4068 /*---------------------------------------------------------*/ 4069 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4070 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4071 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4072 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4073 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4074 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4075 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4076 4077 m = merge->rowmap->n; 4078 owners = merge->rowmap->range; 4079 4080 /* determine the number of messages to send, their lengths */ 4081 /*---------------------------------------------------------*/ 4082 len_s = merge->len_s; 4083 4084 len = 0; /* length of buf_si[] */ 4085 merge->nsend = 0; 4086 for (proc=0; proc<size; proc++) { 4087 len_si[proc] = 0; 4088 if (proc == rank) { 4089 len_s[proc] = 0; 4090 } else { 4091 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4092 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4093 } 4094 if (len_s[proc]) { 4095 merge->nsend++; 4096 nrows = 0; 4097 for (i=owners[proc]; i<owners[proc+1]; i++) { 4098 if (ai[i+1] > ai[i]) nrows++; 4099 } 4100 len_si[proc] = 2*(nrows+1); 4101 len += len_si[proc]; 4102 } 4103 } 4104 4105 /* determine the number and length of messages to receive for ij-structure */ 4106 /*-------------------------------------------------------------------------*/ 4107 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4108 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4109 4110 /* post the Irecv of j-structure */ 4111 /*-------------------------------*/ 4112 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4113 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4114 4115 /* post the Isend of j-structure */ 4116 /*--------------------------------*/ 4117 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4118 4119 for (proc=0, k=0; proc<size; proc++) { 4120 if (!len_s[proc]) continue; 4121 i = owners[proc]; 4122 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4123 k++; 4124 } 4125 4126 /* receives and sends of j-structure are complete */ 4127 /*------------------------------------------------*/ 4128 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4129 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4130 4131 /* send and recv i-structure */ 4132 /*---------------------------*/ 4133 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4134 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4135 4136 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4137 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4138 for (proc=0,k=0; proc<size; proc++) { 4139 if (!len_s[proc]) continue; 4140 /* form outgoing message for i-structure: 4141 buf_si[0]: nrows to be sent 4142 [1:nrows]: row index (global) 4143 [nrows+1:2*nrows+1]: i-structure index 4144 */ 4145 /*-------------------------------------------*/ 4146 nrows = len_si[proc]/2 - 1; 4147 buf_si_i = buf_si + nrows+1; 4148 buf_si[0] = nrows; 4149 buf_si_i[0] = 0; 4150 nrows = 0; 4151 for (i=owners[proc]; i<owners[proc+1]; i++) { 4152 anzi = ai[i+1] - ai[i]; 4153 if (anzi) { 4154 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4155 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4156 nrows++; 4157 } 4158 } 4159 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4160 k++; 4161 buf_si += len_si[proc]; 4162 } 4163 4164 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4165 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4166 4167 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4168 for (i=0; i<merge->nrecv; i++) { 4169 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4170 } 4171 4172 ierr = PetscFree(len_si);CHKERRQ(ierr); 4173 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4174 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4175 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4176 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4177 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4178 ierr = PetscFree(status);CHKERRQ(ierr); 4179 4180 /* compute a local seq matrix in each processor */ 4181 /*----------------------------------------------*/ 4182 /* allocate bi array and free space for accumulating nonzero column info */ 4183 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4184 bi[0] = 0; 4185 4186 /* create and initialize a linked list */ 4187 nlnk = N+1; 4188 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4189 4190 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4191 len = ai[owners[rank+1]] - ai[owners[rank]]; 4192 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4193 4194 current_space = free_space; 4195 4196 /* determine symbolic info for each local row */ 4197 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4198 4199 for (k=0; k<merge->nrecv; k++) { 4200 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4201 nrows = *buf_ri_k[k]; 4202 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4203 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4204 } 4205 4206 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4207 len = 0; 4208 for (i=0; i<m; i++) { 4209 bnzi = 0; 4210 /* add local non-zero cols of this proc's seqmat into lnk */ 4211 arow = owners[rank] + i; 4212 anzi = ai[arow+1] - ai[arow]; 4213 aj = a->j + ai[arow]; 4214 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4215 bnzi += nlnk; 4216 /* add received col data into lnk */ 4217 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4218 if (i == *nextrow[k]) { /* i-th row */ 4219 anzi = *(nextai[k]+1) - *nextai[k]; 4220 aj = buf_rj[k] + *nextai[k]; 4221 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4222 bnzi += nlnk; 4223 nextrow[k]++; nextai[k]++; 4224 } 4225 } 4226 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4227 4228 /* if free space is not available, make more free space */ 4229 if (current_space->local_remaining<bnzi) { 4230 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4231 nspacedouble++; 4232 } 4233 /* copy data into free space, then initialize lnk */ 4234 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4235 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4236 4237 current_space->array += bnzi; 4238 current_space->local_used += bnzi; 4239 current_space->local_remaining -= bnzi; 4240 4241 bi[i+1] = bi[i] + bnzi; 4242 } 4243 4244 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4245 4246 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4247 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4248 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4249 4250 /* create symbolic parallel matrix B_mpi */ 4251 /*---------------------------------------*/ 4252 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4253 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4254 if (n==PETSC_DECIDE) { 4255 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4256 } else { 4257 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4258 } 4259 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4260 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4261 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4262 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4263 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4264 4265 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4266 B_mpi->assembled = PETSC_FALSE; 4267 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4268 merge->bi = bi; 4269 merge->bj = bj; 4270 merge->buf_ri = buf_ri; 4271 merge->buf_rj = buf_rj; 4272 merge->coi = NULL; 4273 merge->coj = NULL; 4274 merge->owners_co = NULL; 4275 4276 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4277 4278 /* attach the supporting struct to B_mpi for reuse */ 4279 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4280 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4281 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4282 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4283 *mpimat = B_mpi; 4284 4285 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4286 PetscFunctionReturn(0); 4287 } 4288 4289 #undef __FUNCT__ 4290 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4291 /*@C 4292 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4293 matrices from each processor 4294 4295 Collective on MPI_Comm 4296 4297 Input Parameters: 4298 + comm - the communicators the parallel matrix will live on 4299 . seqmat - the input sequential matrices 4300 . m - number of local rows (or PETSC_DECIDE) 4301 . n - number of local columns (or PETSC_DECIDE) 4302 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4303 4304 Output Parameter: 4305 . mpimat - the parallel matrix generated 4306 4307 Level: advanced 4308 4309 Notes: 4310 The dimensions of the sequential matrix in each processor MUST be the same. 4311 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4312 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4313 @*/ 4314 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4315 { 4316 PetscErrorCode ierr; 4317 PetscMPIInt size; 4318 4319 PetscFunctionBegin; 4320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4321 if (size == 1) { 4322 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4323 if (scall == MAT_INITIAL_MATRIX) { 4324 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4325 } else { 4326 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4327 } 4328 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4329 PetscFunctionReturn(0); 4330 } 4331 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4332 if (scall == MAT_INITIAL_MATRIX) { 4333 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4334 } 4335 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4336 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4337 PetscFunctionReturn(0); 4338 } 4339 4340 #undef __FUNCT__ 4341 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4342 /*@ 4343 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4344 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4345 with MatGetSize() 4346 4347 Not Collective 4348 4349 Input Parameters: 4350 + A - the matrix 4351 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4352 4353 Output Parameter: 4354 . A_loc - the local sequential matrix generated 4355 4356 Level: developer 4357 4358 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4359 4360 @*/ 4361 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4362 { 4363 PetscErrorCode ierr; 4364 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4365 Mat_SeqAIJ *mat,*a,*b; 4366 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4367 MatScalar *aa,*ba,*cam; 4368 PetscScalar *ca; 4369 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4370 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4371 PetscBool match; 4372 MPI_Comm comm; 4373 PetscMPIInt size; 4374 4375 PetscFunctionBegin; 4376 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4377 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4378 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4379 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4380 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4381 4382 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4383 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4384 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4385 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4386 aa = a->a; ba = b->a; 4387 if (scall == MAT_INITIAL_MATRIX) { 4388 if (size == 1) { 4389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 4393 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4394 ci[0] = 0; 4395 for (i=0; i<am; i++) { 4396 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4397 } 4398 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4399 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4400 k = 0; 4401 for (i=0; i<am; i++) { 4402 ncols_o = bi[i+1] - bi[i]; 4403 ncols_d = ai[i+1] - ai[i]; 4404 /* off-diagonal portion of A */ 4405 for (jo=0; jo<ncols_o; jo++) { 4406 col = cmap[*bj]; 4407 if (col >= cstart) break; 4408 cj[k] = col; bj++; 4409 ca[k++] = *ba++; 4410 } 4411 /* diagonal portion of A */ 4412 for (j=0; j<ncols_d; j++) { 4413 cj[k] = cstart + *aj++; 4414 ca[k++] = *aa++; 4415 } 4416 /* off-diagonal portion of A */ 4417 for (j=jo; j<ncols_o; j++) { 4418 cj[k] = cmap[*bj++]; 4419 ca[k++] = *ba++; 4420 } 4421 } 4422 /* put together the new matrix */ 4423 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4424 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4425 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4426 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4427 mat->free_a = PETSC_TRUE; 4428 mat->free_ij = PETSC_TRUE; 4429 mat->nonew = 0; 4430 } else if (scall == MAT_REUSE_MATRIX) { 4431 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4432 ci = mat->i; cj = mat->j; cam = mat->a; 4433 for (i=0; i<am; i++) { 4434 /* off-diagonal portion of A */ 4435 ncols_o = bi[i+1] - bi[i]; 4436 for (jo=0; jo<ncols_o; jo++) { 4437 col = cmap[*bj]; 4438 if (col >= cstart) break; 4439 *cam++ = *ba++; bj++; 4440 } 4441 /* diagonal portion of A */ 4442 ncols_d = ai[i+1] - ai[i]; 4443 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4444 /* off-diagonal portion of A */ 4445 for (j=jo; j<ncols_o; j++) { 4446 *cam++ = *ba++; bj++; 4447 } 4448 } 4449 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4450 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4456 /*@C 4457 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4458 4459 Not Collective 4460 4461 Input Parameters: 4462 + A - the matrix 4463 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4464 - row, col - index sets of rows and columns to extract (or NULL) 4465 4466 Output Parameter: 4467 . A_loc - the local sequential matrix generated 4468 4469 Level: developer 4470 4471 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4472 4473 @*/ 4474 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4475 { 4476 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4477 PetscErrorCode ierr; 4478 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4479 IS isrowa,iscola; 4480 Mat *aloc; 4481 PetscBool match; 4482 4483 PetscFunctionBegin; 4484 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4485 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4486 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4487 if (!row) { 4488 start = A->rmap->rstart; end = A->rmap->rend; 4489 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4490 } else { 4491 isrowa = *row; 4492 } 4493 if (!col) { 4494 start = A->cmap->rstart; 4495 cmap = a->garray; 4496 nzA = a->A->cmap->n; 4497 nzB = a->B->cmap->n; 4498 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4499 ncols = 0; 4500 for (i=0; i<nzB; i++) { 4501 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4502 else break; 4503 } 4504 imark = i; 4505 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4506 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4507 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4508 } else { 4509 iscola = *col; 4510 } 4511 if (scall != MAT_INITIAL_MATRIX) { 4512 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4513 aloc[0] = *A_loc; 4514 } 4515 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4516 *A_loc = aloc[0]; 4517 ierr = PetscFree(aloc);CHKERRQ(ierr); 4518 if (!row) { 4519 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4520 } 4521 if (!col) { 4522 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4523 } 4524 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4525 PetscFunctionReturn(0); 4526 } 4527 4528 #undef __FUNCT__ 4529 #define __FUNCT__ "MatGetBrowsOfAcols" 4530 /*@C 4531 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4532 4533 Collective on Mat 4534 4535 Input Parameters: 4536 + A,B - the matrices in mpiaij format 4537 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4538 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4539 4540 Output Parameter: 4541 + rowb, colb - index sets of rows and columns of B to extract 4542 - B_seq - the sequential matrix generated 4543 4544 Level: developer 4545 4546 @*/ 4547 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4548 { 4549 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4550 PetscErrorCode ierr; 4551 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4552 IS isrowb,iscolb; 4553 Mat *bseq=NULL; 4554 4555 PetscFunctionBegin; 4556 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4557 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4558 } 4559 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4560 4561 if (scall == MAT_INITIAL_MATRIX) { 4562 start = A->cmap->rstart; 4563 cmap = a->garray; 4564 nzA = a->A->cmap->n; 4565 nzB = a->B->cmap->n; 4566 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4567 ncols = 0; 4568 for (i=0; i<nzB; i++) { /* row < local row index */ 4569 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4570 else break; 4571 } 4572 imark = i; 4573 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4574 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4575 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4576 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4577 } else { 4578 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4579 isrowb = *rowb; iscolb = *colb; 4580 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4581 bseq[0] = *B_seq; 4582 } 4583 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4584 *B_seq = bseq[0]; 4585 ierr = PetscFree(bseq);CHKERRQ(ierr); 4586 if (!rowb) { 4587 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4588 } else { 4589 *rowb = isrowb; 4590 } 4591 if (!colb) { 4592 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4593 } else { 4594 *colb = iscolb; 4595 } 4596 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4597 PetscFunctionReturn(0); 4598 } 4599 4600 #undef __FUNCT__ 4601 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4602 /* 4603 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4604 of the OFF-DIAGONAL portion of local A 4605 4606 Collective on Mat 4607 4608 Input Parameters: 4609 + A,B - the matrices in mpiaij format 4610 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4611 4612 Output Parameter: 4613 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4614 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4615 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4616 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4617 4618 Level: developer 4619 4620 */ 4621 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4622 { 4623 VecScatter_MPI_General *gen_to,*gen_from; 4624 PetscErrorCode ierr; 4625 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4626 Mat_SeqAIJ *b_oth; 4627 VecScatter ctx =a->Mvctx; 4628 MPI_Comm comm; 4629 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4630 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4631 PetscScalar *rvalues,*svalues; 4632 MatScalar *b_otha,*bufa,*bufA; 4633 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4634 MPI_Request *rwaits = NULL,*swaits = NULL; 4635 MPI_Status *sstatus,rstatus; 4636 PetscMPIInt jj,size; 4637 PetscInt *cols,sbs,rbs; 4638 PetscScalar *vals; 4639 4640 PetscFunctionBegin; 4641 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4642 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4643 4644 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4645 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4646 } 4647 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4648 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4649 4650 gen_to = (VecScatter_MPI_General*)ctx->todata; 4651 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4652 rvalues = gen_from->values; /* holds the length of receiving row */ 4653 svalues = gen_to->values; /* holds the length of sending row */ 4654 nrecvs = gen_from->n; 4655 nsends = gen_to->n; 4656 4657 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4658 srow = gen_to->indices; /* local row index to be sent */ 4659 sstarts = gen_to->starts; 4660 sprocs = gen_to->procs; 4661 sstatus = gen_to->sstatus; 4662 sbs = gen_to->bs; 4663 rstarts = gen_from->starts; 4664 rprocs = gen_from->procs; 4665 rbs = gen_from->bs; 4666 4667 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4668 if (scall == MAT_INITIAL_MATRIX) { 4669 /* i-array */ 4670 /*---------*/ 4671 /* post receives */ 4672 for (i=0; i<nrecvs; i++) { 4673 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4674 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4675 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4676 } 4677 4678 /* pack the outgoing message */ 4679 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4680 4681 sstartsj[0] = 0; 4682 rstartsj[0] = 0; 4683 len = 0; /* total length of j or a array to be sent */ 4684 k = 0; 4685 for (i=0; i<nsends; i++) { 4686 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4687 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4688 for (j=0; j<nrows; j++) { 4689 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4690 for (l=0; l<sbs; l++) { 4691 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4692 4693 rowlen[j*sbs+l] = ncols; 4694 4695 len += ncols; 4696 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4697 } 4698 k++; 4699 } 4700 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4701 4702 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4703 } 4704 /* recvs and sends of i-array are completed */ 4705 i = nrecvs; 4706 while (i--) { 4707 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4708 } 4709 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4710 4711 /* allocate buffers for sending j and a arrays */ 4712 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4713 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4714 4715 /* create i-array of B_oth */ 4716 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4717 4718 b_othi[0] = 0; 4719 len = 0; /* total length of j or a array to be received */ 4720 k = 0; 4721 for (i=0; i<nrecvs; i++) { 4722 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4723 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4724 for (j=0; j<nrows; j++) { 4725 b_othi[k+1] = b_othi[k] + rowlen[j]; 4726 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4727 k++; 4728 } 4729 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4730 } 4731 4732 /* allocate space for j and a arrrays of B_oth */ 4733 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4734 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4735 4736 /* j-array */ 4737 /*---------*/ 4738 /* post receives of j-array */ 4739 for (i=0; i<nrecvs; i++) { 4740 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4741 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4742 } 4743 4744 /* pack the outgoing message j-array */ 4745 k = 0; 4746 for (i=0; i<nsends; i++) { 4747 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4748 bufJ = bufj+sstartsj[i]; 4749 for (j=0; j<nrows; j++) { 4750 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4751 for (ll=0; ll<sbs; ll++) { 4752 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4753 for (l=0; l<ncols; l++) { 4754 *bufJ++ = cols[l]; 4755 } 4756 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4757 } 4758 } 4759 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4760 } 4761 4762 /* recvs and sends of j-array are completed */ 4763 i = nrecvs; 4764 while (i--) { 4765 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4766 } 4767 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4768 } else if (scall == MAT_REUSE_MATRIX) { 4769 sstartsj = *startsj_s; 4770 rstartsj = *startsj_r; 4771 bufa = *bufa_ptr; 4772 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4773 b_otha = b_oth->a; 4774 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4775 4776 /* a-array */ 4777 /*---------*/ 4778 /* post receives of a-array */ 4779 for (i=0; i<nrecvs; i++) { 4780 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4781 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4782 } 4783 4784 /* pack the outgoing message a-array */ 4785 k = 0; 4786 for (i=0; i<nsends; i++) { 4787 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4788 bufA = bufa+sstartsj[i]; 4789 for (j=0; j<nrows; j++) { 4790 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4791 for (ll=0; ll<sbs; ll++) { 4792 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4793 for (l=0; l<ncols; l++) { 4794 *bufA++ = vals[l]; 4795 } 4796 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4797 } 4798 } 4799 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4800 } 4801 /* recvs and sends of a-array are completed */ 4802 i = nrecvs; 4803 while (i--) { 4804 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4805 } 4806 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4807 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4808 4809 if (scall == MAT_INITIAL_MATRIX) { 4810 /* put together the new matrix */ 4811 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4812 4813 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4814 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4815 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4816 b_oth->free_a = PETSC_TRUE; 4817 b_oth->free_ij = PETSC_TRUE; 4818 b_oth->nonew = 0; 4819 4820 ierr = PetscFree(bufj);CHKERRQ(ierr); 4821 if (!startsj_s || !bufa_ptr) { 4822 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4823 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4824 } else { 4825 *startsj_s = sstartsj; 4826 *startsj_r = rstartsj; 4827 *bufa_ptr = bufa; 4828 } 4829 } 4830 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4831 PetscFunctionReturn(0); 4832 } 4833 4834 #undef __FUNCT__ 4835 #define __FUNCT__ "MatGetCommunicationStructs" 4836 /*@C 4837 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4838 4839 Not Collective 4840 4841 Input Parameters: 4842 . A - The matrix in mpiaij format 4843 4844 Output Parameter: 4845 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4846 . colmap - A map from global column index to local index into lvec 4847 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4848 4849 Level: developer 4850 4851 @*/ 4852 #if defined(PETSC_USE_CTABLE) 4853 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4854 #else 4855 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4856 #endif 4857 { 4858 Mat_MPIAIJ *a; 4859 4860 PetscFunctionBegin; 4861 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4862 PetscValidPointer(lvec, 2); 4863 PetscValidPointer(colmap, 3); 4864 PetscValidPointer(multScatter, 4); 4865 a = (Mat_MPIAIJ*) A->data; 4866 if (lvec) *lvec = a->lvec; 4867 if (colmap) *colmap = a->colmap; 4868 if (multScatter) *multScatter = a->Mvctx; 4869 PetscFunctionReturn(0); 4870 } 4871 4872 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4873 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4874 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4875 #if defined(PETSC_HAVE_ELEMENTAL) 4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4877 #endif 4878 #if defined(PETSC_HAVE_HYPRE) 4879 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4880 #endif 4881 4882 #undef __FUNCT__ 4883 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4884 /* 4885 Computes (B'*A')' since computing B*A directly is untenable 4886 4887 n p p 4888 ( ) ( ) ( ) 4889 m ( A ) * n ( B ) = m ( C ) 4890 ( ) ( ) ( ) 4891 4892 */ 4893 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4894 { 4895 PetscErrorCode ierr; 4896 Mat At,Bt,Ct; 4897 4898 PetscFunctionBegin; 4899 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4900 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4901 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4902 ierr = MatDestroy(&At);CHKERRQ(ierr); 4903 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4904 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4905 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4906 PetscFunctionReturn(0); 4907 } 4908 4909 #undef __FUNCT__ 4910 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4911 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4912 { 4913 PetscErrorCode ierr; 4914 PetscInt m=A->rmap->n,n=B->cmap->n; 4915 Mat Cmat; 4916 4917 PetscFunctionBegin; 4918 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4919 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4920 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4921 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4922 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4923 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4924 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4925 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4926 4927 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4928 4929 *C = Cmat; 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /* ----------------------------------------------------------------*/ 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4936 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4937 { 4938 PetscErrorCode ierr; 4939 4940 PetscFunctionBegin; 4941 if (scall == MAT_INITIAL_MATRIX) { 4942 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4943 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4944 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4945 } 4946 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4947 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4948 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4949 PetscFunctionReturn(0); 4950 } 4951 4952 /*MC 4953 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4954 4955 Options Database Keys: 4956 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4957 4958 Level: beginner 4959 4960 .seealso: MatCreateAIJ() 4961 M*/ 4962 4963 #undef __FUNCT__ 4964 #define __FUNCT__ "MatCreate_MPIAIJ" 4965 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4966 { 4967 Mat_MPIAIJ *b; 4968 PetscErrorCode ierr; 4969 PetscMPIInt size; 4970 4971 PetscFunctionBegin; 4972 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4973 4974 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4975 B->data = (void*)b; 4976 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4977 B->assembled = PETSC_FALSE; 4978 B->insertmode = NOT_SET_VALUES; 4979 b->size = size; 4980 4981 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4982 4983 /* build cache for off array entries formed */ 4984 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4985 4986 b->donotstash = PETSC_FALSE; 4987 b->colmap = 0; 4988 b->garray = 0; 4989 b->roworiented = PETSC_TRUE; 4990 4991 /* stuff used for matrix vector multiply */ 4992 b->lvec = NULL; 4993 b->Mvctx = NULL; 4994 4995 /* stuff for MatGetRow() */ 4996 b->rowindices = 0; 4997 b->rowvalues = 0; 4998 b->getrowactive = PETSC_FALSE; 4999 5000 /* flexible pointer used in CUSP/CUSPARSE classes */ 5001 b->spptr = NULL; 5002 5003 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5004 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5005 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5013 #if defined(PETSC_HAVE_ELEMENTAL) 5014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5015 #endif 5016 #if defined(PETSC_HAVE_HYPRE) 5017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5018 #endif 5019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5022 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 #undef __FUNCT__ 5027 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5028 /*@C 5029 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5030 and "off-diagonal" part of the matrix in CSR format. 5031 5032 Collective on MPI_Comm 5033 5034 Input Parameters: 5035 + comm - MPI communicator 5036 . m - number of local rows (Cannot be PETSC_DECIDE) 5037 . n - This value should be the same as the local size used in creating the 5038 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5039 calculated if N is given) For square matrices n is almost always m. 5040 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5041 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5042 . i - row indices for "diagonal" portion of matrix 5043 . j - column indices 5044 . a - matrix values 5045 . oi - row indices for "off-diagonal" portion of matrix 5046 . oj - column indices 5047 - oa - matrix values 5048 5049 Output Parameter: 5050 . mat - the matrix 5051 5052 Level: advanced 5053 5054 Notes: 5055 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5056 must free the arrays once the matrix has been destroyed and not before. 5057 5058 The i and j indices are 0 based 5059 5060 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5061 5062 This sets local rows and cannot be used to set off-processor values. 5063 5064 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5065 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5066 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5067 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5068 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5069 communication if it is known that only local entries will be set. 5070 5071 .keywords: matrix, aij, compressed row, sparse, parallel 5072 5073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5074 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5075 @*/ 5076 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5077 { 5078 PetscErrorCode ierr; 5079 Mat_MPIAIJ *maij; 5080 5081 PetscFunctionBegin; 5082 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5083 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5084 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5085 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5086 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5087 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5088 maij = (Mat_MPIAIJ*) (*mat)->data; 5089 5090 (*mat)->preallocated = PETSC_TRUE; 5091 5092 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5093 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5094 5095 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5096 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5097 5098 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5099 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5100 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5101 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5102 5103 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5104 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5105 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 /* 5110 Special version for direct calls from Fortran 5111 */ 5112 #include <petsc/private/fortranimpl.h> 5113 5114 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5115 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5116 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5117 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5118 #endif 5119 5120 /* Change these macros so can be used in void function */ 5121 #undef CHKERRQ 5122 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5123 #undef SETERRQ2 5124 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5125 #undef SETERRQ3 5126 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5127 #undef SETERRQ 5128 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5129 5130 #undef __FUNCT__ 5131 #define __FUNCT__ "matsetvaluesmpiaij_" 5132 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5133 { 5134 Mat mat = *mmat; 5135 PetscInt m = *mm, n = *mn; 5136 InsertMode addv = *maddv; 5137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5138 PetscScalar value; 5139 PetscErrorCode ierr; 5140 5141 MatCheckPreallocated(mat,1); 5142 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5143 5144 #if defined(PETSC_USE_DEBUG) 5145 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5146 #endif 5147 { 5148 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5149 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5150 PetscBool roworiented = aij->roworiented; 5151 5152 /* Some Variables required in the macro */ 5153 Mat A = aij->A; 5154 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5155 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5156 MatScalar *aa = a->a; 5157 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5158 Mat B = aij->B; 5159 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5160 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5161 MatScalar *ba = b->a; 5162 5163 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5164 PetscInt nonew = a->nonew; 5165 MatScalar *ap1,*ap2; 5166 5167 PetscFunctionBegin; 5168 for (i=0; i<m; i++) { 5169 if (im[i] < 0) continue; 5170 #if defined(PETSC_USE_DEBUG) 5171 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5172 #endif 5173 if (im[i] >= rstart && im[i] < rend) { 5174 row = im[i] - rstart; 5175 lastcol1 = -1; 5176 rp1 = aj + ai[row]; 5177 ap1 = aa + ai[row]; 5178 rmax1 = aimax[row]; 5179 nrow1 = ailen[row]; 5180 low1 = 0; 5181 high1 = nrow1; 5182 lastcol2 = -1; 5183 rp2 = bj + bi[row]; 5184 ap2 = ba + bi[row]; 5185 rmax2 = bimax[row]; 5186 nrow2 = bilen[row]; 5187 low2 = 0; 5188 high2 = nrow2; 5189 5190 for (j=0; j<n; j++) { 5191 if (roworiented) value = v[i*n+j]; 5192 else value = v[i+j*m]; 5193 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5194 if (in[j] >= cstart && in[j] < cend) { 5195 col = in[j] - cstart; 5196 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5197 } else if (in[j] < 0) continue; 5198 #if defined(PETSC_USE_DEBUG) 5199 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5200 #endif 5201 else { 5202 if (mat->was_assembled) { 5203 if (!aij->colmap) { 5204 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5205 } 5206 #if defined(PETSC_USE_CTABLE) 5207 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5208 col--; 5209 #else 5210 col = aij->colmap[in[j]] - 1; 5211 #endif 5212 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5213 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5214 col = in[j]; 5215 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5216 B = aij->B; 5217 b = (Mat_SeqAIJ*)B->data; 5218 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5219 rp2 = bj + bi[row]; 5220 ap2 = ba + bi[row]; 5221 rmax2 = bimax[row]; 5222 nrow2 = bilen[row]; 5223 low2 = 0; 5224 high2 = nrow2; 5225 bm = aij->B->rmap->n; 5226 ba = b->a; 5227 } 5228 } else col = in[j]; 5229 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5230 } 5231 } 5232 } else if (!aij->donotstash) { 5233 if (roworiented) { 5234 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5235 } else { 5236 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5237 } 5238 } 5239 } 5240 } 5241 PetscFunctionReturnVoid(); 5242 } 5243 5244