1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1160 #endif 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1166 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1167 { 1168 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1169 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1170 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1171 PetscErrorCode ierr; 1172 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1173 int fd; 1174 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1175 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1176 PetscScalar *column_values; 1177 PetscInt message_count,flowcontrolcount; 1178 FILE *file; 1179 1180 PetscFunctionBegin; 1181 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1183 nz = A->nz + B->nz; 1184 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1185 if (!rank) { 1186 header[0] = MAT_FILE_CLASSID; 1187 header[1] = mat->rmap->N; 1188 header[2] = mat->cmap->N; 1189 1190 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1192 /* get largest number of rows any processor has */ 1193 rlen = mat->rmap->n; 1194 range = mat->rmap->range; 1195 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1196 } else { 1197 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1198 rlen = mat->rmap->n; 1199 } 1200 1201 /* load up the local row counts */ 1202 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1203 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1204 1205 /* store the row lengths to the file */ 1206 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1207 if (!rank) { 1208 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1209 for (i=1; i<size; i++) { 1210 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1211 rlen = range[i+1] - range[i]; 1212 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 } 1215 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1216 } else { 1217 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1218 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1220 } 1221 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1222 1223 /* load up the local column indices */ 1224 nzmax = nz; /* th processor needs space a largest processor needs */ 1225 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1227 cnt = 0; 1228 for (i=0; i<mat->rmap->n; i++) { 1229 for (j=B->i[i]; j<B->i[i+1]; j++) { 1230 if ((col = garray[B->j[j]]) > cstart) break; 1231 column_indices[cnt++] = col; 1232 } 1233 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1234 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1235 } 1236 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1237 1238 /* store the column indices to the file */ 1239 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1240 if (!rank) { 1241 MPI_Status status; 1242 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1243 for (i=1; i<size; i++) { 1244 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1245 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1246 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1247 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1249 } 1250 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1251 } else { 1252 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1253 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1255 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1256 } 1257 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1258 1259 /* load up the local column values */ 1260 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1261 cnt = 0; 1262 for (i=0; i<mat->rmap->n; i++) { 1263 for (j=B->i[i]; j<B->i[i+1]; j++) { 1264 if (garray[B->j[j]] > cstart) break; 1265 column_values[cnt++] = B->a[j]; 1266 } 1267 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1268 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1269 } 1270 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1271 1272 /* store the column values to the file */ 1273 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1274 if (!rank) { 1275 MPI_Status status; 1276 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1277 for (i=1; i<size; i++) { 1278 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1279 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1280 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1281 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1283 } 1284 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1285 } else { 1286 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1287 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(column_values);CHKERRQ(ierr); 1292 1293 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1294 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #include <petscdraw.h> 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1301 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1302 { 1303 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1304 PetscErrorCode ierr; 1305 PetscMPIInt rank = aij->rank,size = aij->size; 1306 PetscBool isdraw,iascii,isbinary; 1307 PetscViewer sviewer; 1308 PetscViewerFormat format; 1309 1310 PetscFunctionBegin; 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1313 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1314 if (iascii) { 1315 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1316 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1317 MatInfo info; 1318 PetscBool inodes; 1319 1320 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1321 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1322 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1323 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1324 if (!inodes) { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1329 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1330 } 1331 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1332 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1333 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1335 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1337 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1338 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1341 PetscInt inodecount,inodelimit,*inodes; 1342 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1343 if (inodes) { 1344 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1345 } else { 1346 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1347 } 1348 PetscFunctionReturn(0); 1349 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1350 PetscFunctionReturn(0); 1351 } 1352 } else if (isbinary) { 1353 if (size == 1) { 1354 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1355 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1356 } else { 1357 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1358 } 1359 PetscFunctionReturn(0); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1364 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1365 if (isnull) PetscFunctionReturn(0); 1366 } 1367 1368 { 1369 /* assemble the entire matrix onto first processor. */ 1370 Mat A; 1371 Mat_SeqAIJ *Aloc; 1372 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1373 MatScalar *a; 1374 1375 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1376 if (!rank) { 1377 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1378 } else { 1379 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1380 } 1381 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1382 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1383 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1384 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1385 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1386 1387 /* copy over the A part */ 1388 Aloc = (Mat_SeqAIJ*)aij->A->data; 1389 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1390 row = mat->rmap->rstart; 1391 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1392 for (i=0; i<m; i++) { 1393 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1394 row++; 1395 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1396 } 1397 aj = Aloc->j; 1398 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1399 1400 /* copy over the B part */ 1401 Aloc = (Mat_SeqAIJ*)aij->B->data; 1402 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1403 row = mat->rmap->rstart; 1404 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1405 ct = cols; 1406 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; 1410 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1411 } 1412 ierr = PetscFree(ct);CHKERRQ(ierr); 1413 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 /* 1416 Everyone has to call to draw the matrix since the graphics waits are 1417 synchronized across all processors that share the PetscDraw object 1418 */ 1419 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1420 if (!rank) { 1421 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1422 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1423 } 1424 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1425 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1426 ierr = MatDestroy(&A);CHKERRQ(ierr); 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatView_MPIAIJ" 1433 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1434 { 1435 PetscErrorCode ierr; 1436 PetscBool iascii,isdraw,issocket,isbinary; 1437 1438 PetscFunctionBegin; 1439 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1440 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1443 if (iascii || isdraw || isbinary || issocket) { 1444 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatSOR_MPIAIJ" 1451 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1452 { 1453 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1454 PetscErrorCode ierr; 1455 Vec bb1 = 0; 1456 PetscBool hasop; 1457 1458 PetscFunctionBegin; 1459 if (flag == SOR_APPLY_UPPER) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1465 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1466 } 1467 1468 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1469 if (flag & SOR_ZERO_INITIAL_GUESS) { 1470 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1471 its--; 1472 } 1473 1474 while (its--) { 1475 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1480 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1481 1482 /* local sweep */ 1483 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1484 } 1485 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1486 if (flag & SOR_ZERO_INITIAL_GUESS) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 its--; 1489 } 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1502 if (flag & SOR_ZERO_INITIAL_GUESS) { 1503 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1504 its--; 1505 } 1506 while (its--) { 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 1510 /* update rhs: bb1 = bb - B*x */ 1511 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1512 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1513 1514 /* local sweep */ 1515 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1516 } 1517 } else if (flag & SOR_EISENSTAT) { 1518 Vec xx1; 1519 1520 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1521 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1522 1523 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 if (!mat->diag) { 1526 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1527 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1528 } 1529 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1530 if (hasop) { 1531 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1532 } else { 1533 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1534 } 1535 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1536 1537 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1538 1539 /* local sweep */ 1540 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1541 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1542 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1543 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1544 1545 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1546 1547 matin->factorerrortype = mat->A->factorerrortype; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatPermute_MPIAIJ" 1553 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1554 { 1555 Mat aA,aB,Aperm; 1556 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1557 PetscScalar *aa,*ba; 1558 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1559 PetscSF rowsf,sf; 1560 IS parcolp = NULL; 1561 PetscBool done; 1562 PetscErrorCode ierr; 1563 1564 PetscFunctionBegin; 1565 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1566 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1567 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1568 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1569 1570 /* Invert row permutation to find out where my rows should go */ 1571 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1572 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1573 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1574 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1575 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1577 1578 /* Invert column permutation to find out where my columns should go */ 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1582 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1583 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1585 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1586 1587 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1590 1591 /* Find out where my gcols should go */ 1592 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1593 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1594 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1595 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1596 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1597 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1598 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1599 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1600 1601 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1602 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1603 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1604 for (i=0; i<m; i++) { 1605 PetscInt row = rdest[i],rowner; 1606 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1607 for (j=ai[i]; j<ai[i+1]; j++) { 1608 PetscInt cowner,col = cdest[aj[j]]; 1609 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1610 if (rowner == cowner) dnnz[i]++; 1611 else onnz[i]++; 1612 } 1613 for (j=bi[i]; j<bi[i+1]; j++) { 1614 PetscInt cowner,col = gcdest[bj[j]]; 1615 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1616 if (rowner == cowner) dnnz[i]++; 1617 else onnz[i]++; 1618 } 1619 } 1620 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1621 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1622 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1623 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1624 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1625 1626 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1627 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1628 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1629 for (i=0; i<m; i++) { 1630 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1631 PetscInt j0,rowlen; 1632 rowlen = ai[i+1] - ai[i]; 1633 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1634 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1635 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1636 } 1637 rowlen = bi[i+1] - bi[i]; 1638 for (j0=j=0; j<rowlen; j0=j) { 1639 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1640 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1641 } 1642 } 1643 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1645 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1646 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1647 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1648 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1649 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1650 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1651 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1652 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1653 *B = Aperm; 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1659 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1660 { 1661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1662 PetscErrorCode ierr; 1663 1664 PetscFunctionBegin; 1665 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1666 if (ghosts) *ghosts = aij->garray; 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1672 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1673 { 1674 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1675 Mat A = mat->A,B = mat->B; 1676 PetscErrorCode ierr; 1677 PetscReal isend[5],irecv[5]; 1678 1679 PetscFunctionBegin; 1680 info->block_size = 1.0; 1681 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1682 1683 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1684 isend[3] = info->memory; isend[4] = info->mallocs; 1685 1686 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1687 1688 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1689 isend[3] += info->memory; isend[4] += info->mallocs; 1690 if (flag == MAT_LOCAL) { 1691 info->nz_used = isend[0]; 1692 info->nz_allocated = isend[1]; 1693 info->nz_unneeded = isend[2]; 1694 info->memory = isend[3]; 1695 info->mallocs = isend[4]; 1696 } else if (flag == MAT_GLOBAL_MAX) { 1697 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1698 1699 info->nz_used = irecv[0]; 1700 info->nz_allocated = irecv[1]; 1701 info->nz_unneeded = irecv[2]; 1702 info->memory = irecv[3]; 1703 info->mallocs = irecv[4]; 1704 } else if (flag == MAT_GLOBAL_SUM) { 1705 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1706 1707 info->nz_used = irecv[0]; 1708 info->nz_allocated = irecv[1]; 1709 info->nz_unneeded = irecv[2]; 1710 info->memory = irecv[3]; 1711 info->mallocs = irecv[4]; 1712 } 1713 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1714 info->fill_ratio_needed = 0; 1715 info->factor_mallocs = 0; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatSetOption_MPIAIJ" 1721 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1722 { 1723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 switch (op) { 1728 case MAT_NEW_NONZERO_LOCATIONS: 1729 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1730 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1731 case MAT_KEEP_NONZERO_PATTERN: 1732 case MAT_NEW_NONZERO_LOCATION_ERR: 1733 case MAT_USE_INODES: 1734 case MAT_IGNORE_ZERO_ENTRIES: 1735 MatCheckPreallocated(A,1); 1736 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1737 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_ROW_ORIENTED: 1740 MatCheckPreallocated(A,1); 1741 a->roworiented = flg; 1742 1743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1744 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1745 break; 1746 case MAT_NEW_DIAGONALS: 1747 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1748 break; 1749 case MAT_IGNORE_OFF_PROC_ENTRIES: 1750 a->donotstash = flg; 1751 break; 1752 case MAT_SPD: 1753 A->spd_set = PETSC_TRUE; 1754 A->spd = flg; 1755 if (flg) { 1756 A->symmetric = PETSC_TRUE; 1757 A->structurally_symmetric = PETSC_TRUE; 1758 A->symmetric_set = PETSC_TRUE; 1759 A->structurally_symmetric_set = PETSC_TRUE; 1760 } 1761 break; 1762 case MAT_SYMMETRIC: 1763 MatCheckPreallocated(A,1); 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 break; 1766 case MAT_STRUCTURALLY_SYMMETRIC: 1767 MatCheckPreallocated(A,1); 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_HERMITIAN: 1771 MatCheckPreallocated(A,1); 1772 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1773 break; 1774 case MAT_SYMMETRY_ETERNAL: 1775 MatCheckPreallocated(A,1); 1776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1777 break; 1778 case MAT_SUBMAT_SINGLEIS: 1779 A->submat_singleis = flg; 1780 break; 1781 default: 1782 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1783 } 1784 PetscFunctionReturn(0); 1785 } 1786 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatGetRow_MPIAIJ" 1789 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1790 { 1791 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1792 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1793 PetscErrorCode ierr; 1794 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1795 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1796 PetscInt *cmap,*idx_p; 1797 1798 PetscFunctionBegin; 1799 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1800 mat->getrowactive = PETSC_TRUE; 1801 1802 if (!mat->rowvalues && (idx || v)) { 1803 /* 1804 allocate enough space to hold information from the longest row. 1805 */ 1806 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1807 PetscInt max = 1,tmp; 1808 for (i=0; i<matin->rmap->n; i++) { 1809 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1810 if (max < tmp) max = tmp; 1811 } 1812 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1813 } 1814 1815 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1816 lrow = row - rstart; 1817 1818 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1819 if (!v) {pvA = 0; pvB = 0;} 1820 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1821 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1822 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1823 nztot = nzA + nzB; 1824 1825 cmap = mat->garray; 1826 if (v || idx) { 1827 if (nztot) { 1828 /* Sort by increasing column numbers, assuming A and B already sorted */ 1829 PetscInt imark = -1; 1830 if (v) { 1831 *v = v_p = mat->rowvalues; 1832 for (i=0; i<nzB; i++) { 1833 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1834 else break; 1835 } 1836 imark = i; 1837 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1838 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1839 } 1840 if (idx) { 1841 *idx = idx_p = mat->rowindices; 1842 if (imark > -1) { 1843 for (i=0; i<imark; i++) { 1844 idx_p[i] = cmap[cworkB[i]]; 1845 } 1846 } else { 1847 for (i=0; i<nzB; i++) { 1848 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1849 else break; 1850 } 1851 imark = i; 1852 } 1853 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1854 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1855 } 1856 } else { 1857 if (idx) *idx = 0; 1858 if (v) *v = 0; 1859 } 1860 } 1861 *nz = nztot; 1862 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1863 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 #undef __FUNCT__ 1868 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1869 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1870 { 1871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1872 1873 PetscFunctionBegin; 1874 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1875 aij->getrowactive = PETSC_FALSE; 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNCT__ 1880 #define __FUNCT__ "MatNorm_MPIAIJ" 1881 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1882 { 1883 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1884 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1885 PetscErrorCode ierr; 1886 PetscInt i,j,cstart = mat->cmap->rstart; 1887 PetscReal sum = 0.0; 1888 MatScalar *v; 1889 1890 PetscFunctionBegin; 1891 if (aij->size == 1) { 1892 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1893 } else { 1894 if (type == NORM_FROBENIUS) { 1895 v = amat->a; 1896 for (i=0; i<amat->nz; i++) { 1897 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1898 } 1899 v = bmat->a; 1900 for (i=0; i<bmat->nz; i++) { 1901 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1902 } 1903 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1904 *norm = PetscSqrtReal(*norm); 1905 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1906 } else if (type == NORM_1) { /* max column norm */ 1907 PetscReal *tmp,*tmp2; 1908 PetscInt *jj,*garray = aij->garray; 1909 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1910 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1911 *norm = 0.0; 1912 v = amat->a; jj = amat->j; 1913 for (j=0; j<amat->nz; j++) { 1914 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1915 } 1916 v = bmat->a; jj = bmat->j; 1917 for (j=0; j<bmat->nz; j++) { 1918 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1919 } 1920 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1921 for (j=0; j<mat->cmap->N; j++) { 1922 if (tmp2[j] > *norm) *norm = tmp2[j]; 1923 } 1924 ierr = PetscFree(tmp);CHKERRQ(ierr); 1925 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1926 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1927 } else if (type == NORM_INFINITY) { /* max row norm */ 1928 PetscReal ntemp = 0.0; 1929 for (j=0; j<aij->A->rmap->n; j++) { 1930 v = amat->a + amat->i[j]; 1931 sum = 0.0; 1932 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1933 sum += PetscAbsScalar(*v); v++; 1934 } 1935 v = bmat->a + bmat->i[j]; 1936 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1937 sum += PetscAbsScalar(*v); v++; 1938 } 1939 if (sum > ntemp) ntemp = sum; 1940 } 1941 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1942 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1943 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatTranspose_MPIAIJ" 1950 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1951 { 1952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1953 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1954 PetscErrorCode ierr; 1955 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1956 PetscInt cstart = A->cmap->rstart,ncol; 1957 Mat B; 1958 MatScalar *array; 1959 1960 PetscFunctionBegin; 1961 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1962 1963 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1964 ai = Aloc->i; aj = Aloc->j; 1965 bi = Bloc->i; bj = Bloc->j; 1966 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1967 PetscInt *d_nnz,*g_nnz,*o_nnz; 1968 PetscSFNode *oloc; 1969 PETSC_UNUSED PetscSF sf; 1970 1971 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1972 /* compute d_nnz for preallocation */ 1973 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1974 for (i=0; i<ai[ma]; i++) { 1975 d_nnz[aj[i]]++; 1976 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1977 } 1978 /* compute local off-diagonal contributions */ 1979 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1980 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1981 /* map those to global */ 1982 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1983 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1984 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1985 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1986 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1987 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1988 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1989 1990 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1991 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1992 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1993 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1994 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1995 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1996 } else { 1997 B = *matout; 1998 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1999 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2000 } 2001 2002 /* copy over the A part */ 2003 array = Aloc->a; 2004 row = A->rmap->rstart; 2005 for (i=0; i<ma; i++) { 2006 ncol = ai[i+1]-ai[i]; 2007 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2008 row++; 2009 array += ncol; aj += ncol; 2010 } 2011 aj = Aloc->j; 2012 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2013 2014 /* copy over the B part */ 2015 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2016 array = Bloc->a; 2017 row = A->rmap->rstart; 2018 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2019 cols_tmp = cols; 2020 for (i=0; i<mb; i++) { 2021 ncol = bi[i+1]-bi[i]; 2022 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2023 row++; 2024 array += ncol; cols_tmp += ncol; 2025 } 2026 ierr = PetscFree(cols);CHKERRQ(ierr); 2027 2028 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2030 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2031 *matout = B; 2032 } else { 2033 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2040 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2041 { 2042 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2043 Mat a = aij->A,b = aij->B; 2044 PetscErrorCode ierr; 2045 PetscInt s1,s2,s3; 2046 2047 PetscFunctionBegin; 2048 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2049 if (rr) { 2050 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2051 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2052 /* Overlap communication with computation. */ 2053 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2054 } 2055 if (ll) { 2056 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2057 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2058 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2059 } 2060 /* scale the diagonal block */ 2061 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2062 2063 if (rr) { 2064 /* Do a scatter end and then right scale the off-diagonal block */ 2065 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 #undef __FUNCT__ 2072 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2073 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2074 { 2075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatEqual_MPIAIJ" 2085 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2086 { 2087 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2088 Mat a,b,c,d; 2089 PetscBool flg; 2090 PetscErrorCode ierr; 2091 2092 PetscFunctionBegin; 2093 a = matA->A; b = matA->B; 2094 c = matB->A; d = matB->B; 2095 2096 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2097 if (flg) { 2098 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2099 } 2100 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 #undef __FUNCT__ 2105 #define __FUNCT__ "MatCopy_MPIAIJ" 2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2107 { 2108 PetscErrorCode ierr; 2109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2110 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2111 2112 PetscFunctionBegin; 2113 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2114 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2115 /* because of the column compression in the off-processor part of the matrix a->B, 2116 the number of columns in a->B and b->B may be different, hence we cannot call 2117 the MatCopy() directly on the two parts. If need be, we can provide a more 2118 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2119 then copying the submatrices */ 2120 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2121 } else { 2122 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2123 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127 2128 #undef __FUNCT__ 2129 #define __FUNCT__ "MatSetUp_MPIAIJ" 2130 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2131 { 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* 2140 Computes the number of nonzeros per row needed for preallocation when X and Y 2141 have different nonzero structure. 2142 */ 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2146 { 2147 PetscInt i,j,k,nzx,nzy; 2148 2149 PetscFunctionBegin; 2150 /* Set the number of nonzeros in the new matrix */ 2151 for (i=0; i<m; i++) { 2152 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2153 nzx = xi[i+1] - xi[i]; 2154 nzy = yi[i+1] - yi[i]; 2155 nnz[i] = 0; 2156 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2157 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2158 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2159 nnz[i]++; 2160 } 2161 for (; k<nzy; k++) nnz[i]++; 2162 } 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2169 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2170 { 2171 PetscErrorCode ierr; 2172 PetscInt m = Y->rmap->N; 2173 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2174 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "MatAXPY_MPIAIJ" 2183 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2184 { 2185 PetscErrorCode ierr; 2186 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2187 PetscBLASInt bnz,one=1; 2188 Mat_SeqAIJ *x,*y; 2189 2190 PetscFunctionBegin; 2191 if (str == SAME_NONZERO_PATTERN) { 2192 PetscScalar alpha = a; 2193 x = (Mat_SeqAIJ*)xx->A->data; 2194 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2195 y = (Mat_SeqAIJ*)yy->A->data; 2196 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2197 x = (Mat_SeqAIJ*)xx->B->data; 2198 y = (Mat_SeqAIJ*)yy->B->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2201 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2202 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2203 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2204 } else { 2205 Mat B; 2206 PetscInt *nnz_d,*nnz_o; 2207 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2208 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2209 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2210 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2211 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2212 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2213 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2214 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2215 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2216 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2217 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2218 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2219 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2220 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2221 } 2222 PetscFunctionReturn(0); 2223 } 2224 2225 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatConjugate_MPIAIJ" 2229 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2230 { 2231 #if defined(PETSC_USE_COMPLEX) 2232 PetscErrorCode ierr; 2233 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2234 2235 PetscFunctionBegin; 2236 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2237 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2238 #else 2239 PetscFunctionBegin; 2240 #endif 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatRealPart_MPIAIJ" 2246 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2247 { 2248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2253 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2259 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2260 { 2261 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2266 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2272 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2273 { 2274 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2275 PetscErrorCode ierr; 2276 PetscInt i,*idxb = 0; 2277 PetscScalar *va,*vb; 2278 Vec vtmp; 2279 2280 PetscFunctionBegin; 2281 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2282 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2283 if (idx) { 2284 for (i=0; i<A->rmap->n; i++) { 2285 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2286 } 2287 } 2288 2289 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2290 if (idx) { 2291 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2292 } 2293 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2294 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2295 2296 for (i=0; i<A->rmap->n; i++) { 2297 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2298 va[i] = vb[i]; 2299 if (idx) idx[i] = a->garray[idxb[i]]; 2300 } 2301 } 2302 2303 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2304 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2305 ierr = PetscFree(idxb);CHKERRQ(ierr); 2306 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2312 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2313 { 2314 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2315 PetscErrorCode ierr; 2316 PetscInt i,*idxb = 0; 2317 PetscScalar *va,*vb; 2318 Vec vtmp; 2319 2320 PetscFunctionBegin; 2321 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2322 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2323 if (idx) { 2324 for (i=0; i<A->cmap->n; i++) { 2325 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2326 } 2327 } 2328 2329 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2330 if (idx) { 2331 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2332 } 2333 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2334 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2335 2336 for (i=0; i<A->rmap->n; i++) { 2337 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2338 va[i] = vb[i]; 2339 if (idx) idx[i] = a->garray[idxb[i]]; 2340 } 2341 } 2342 2343 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2344 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2345 ierr = PetscFree(idxb);CHKERRQ(ierr); 2346 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2352 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2353 { 2354 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2355 PetscInt n = A->rmap->n; 2356 PetscInt cstart = A->cmap->rstart; 2357 PetscInt *cmap = mat->garray; 2358 PetscInt *diagIdx, *offdiagIdx; 2359 Vec diagV, offdiagV; 2360 PetscScalar *a, *diagA, *offdiagA; 2361 PetscInt r; 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2367 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2368 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2369 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2370 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2371 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2372 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2373 for (r = 0; r < n; ++r) { 2374 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2375 a[r] = diagA[r]; 2376 idx[r] = cstart + diagIdx[r]; 2377 } else { 2378 a[r] = offdiagA[r]; 2379 idx[r] = cmap[offdiagIdx[r]]; 2380 } 2381 } 2382 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2383 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2384 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2385 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2386 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2387 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2393 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2394 { 2395 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2396 PetscInt n = A->rmap->n; 2397 PetscInt cstart = A->cmap->rstart; 2398 PetscInt *cmap = mat->garray; 2399 PetscInt *diagIdx, *offdiagIdx; 2400 Vec diagV, offdiagV; 2401 PetscScalar *a, *diagA, *offdiagA; 2402 PetscInt r; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2407 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2408 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2409 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2410 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2411 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2412 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2413 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2414 for (r = 0; r < n; ++r) { 2415 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 idx[r] = cstart + diagIdx[r]; 2418 } else { 2419 a[r] = offdiagA[r]; 2420 idx[r] = cmap[offdiagIdx[r]]; 2421 } 2422 } 2423 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2425 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2426 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2427 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2428 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 #undef __FUNCT__ 2433 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2434 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2435 { 2436 PetscErrorCode ierr; 2437 Mat *dummy; 2438 2439 PetscFunctionBegin; 2440 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2441 *newmat = *dummy; 2442 ierr = PetscFree(dummy);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2448 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2449 { 2450 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2451 PetscErrorCode ierr; 2452 2453 PetscFunctionBegin; 2454 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2455 A->factorerrortype = a->A->factorerrortype; 2456 PetscFunctionReturn(0); 2457 } 2458 2459 #undef __FUNCT__ 2460 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2461 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2462 { 2463 PetscErrorCode ierr; 2464 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2465 2466 PetscFunctionBegin; 2467 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2468 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2469 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2476 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2477 { 2478 PetscFunctionBegin; 2479 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2480 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2486 /*@ 2487 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2488 2489 Collective on Mat 2490 2491 Input Parameters: 2492 + A - the matrix 2493 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2494 2495 Level: advanced 2496 2497 @*/ 2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2499 { 2500 PetscErrorCode ierr; 2501 2502 PetscFunctionBegin; 2503 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2509 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2510 { 2511 PetscErrorCode ierr; 2512 PetscBool sc = PETSC_FALSE,flg; 2513 2514 PetscFunctionBegin; 2515 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2516 ierr = PetscObjectOptionsBegin((PetscObject)A); 2517 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2518 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2519 if (flg) { 2520 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2521 } 2522 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "MatShift_MPIAIJ" 2528 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2529 { 2530 PetscErrorCode ierr; 2531 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2532 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2533 2534 PetscFunctionBegin; 2535 if (!Y->preallocated) { 2536 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2537 } else if (!aij->nz) { 2538 PetscInt nonew = aij->nonew; 2539 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2540 aij->nonew = nonew; 2541 } 2542 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 #undef __FUNCT__ 2547 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2548 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2549 { 2550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2551 PetscErrorCode ierr; 2552 2553 PetscFunctionBegin; 2554 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2555 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2556 if (d) { 2557 PetscInt rstart; 2558 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2559 *d += rstart; 2560 2561 } 2562 PetscFunctionReturn(0); 2563 } 2564 2565 2566 /* -------------------------------------------------------------------*/ 2567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2568 MatGetRow_MPIAIJ, 2569 MatRestoreRow_MPIAIJ, 2570 MatMult_MPIAIJ, 2571 /* 4*/ MatMultAdd_MPIAIJ, 2572 MatMultTranspose_MPIAIJ, 2573 MatMultTransposeAdd_MPIAIJ, 2574 0, 2575 0, 2576 0, 2577 /*10*/ 0, 2578 0, 2579 0, 2580 MatSOR_MPIAIJ, 2581 MatTranspose_MPIAIJ, 2582 /*15*/ MatGetInfo_MPIAIJ, 2583 MatEqual_MPIAIJ, 2584 MatGetDiagonal_MPIAIJ, 2585 MatDiagonalScale_MPIAIJ, 2586 MatNorm_MPIAIJ, 2587 /*20*/ MatAssemblyBegin_MPIAIJ, 2588 MatAssemblyEnd_MPIAIJ, 2589 MatSetOption_MPIAIJ, 2590 MatZeroEntries_MPIAIJ, 2591 /*24*/ MatZeroRows_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 0, 2596 /*29*/ MatSetUp_MPIAIJ, 2597 0, 2598 0, 2599 MatGetDiagonalBlock_MPIAIJ, 2600 0, 2601 /*34*/ MatDuplicate_MPIAIJ, 2602 0, 2603 0, 2604 0, 2605 0, 2606 /*39*/ MatAXPY_MPIAIJ, 2607 MatGetSubMatrices_MPIAIJ, 2608 MatIncreaseOverlap_MPIAIJ, 2609 MatGetValues_MPIAIJ, 2610 MatCopy_MPIAIJ, 2611 /*44*/ MatGetRowMax_MPIAIJ, 2612 MatScale_MPIAIJ, 2613 MatShift_MPIAIJ, 2614 MatDiagonalSet_MPIAIJ, 2615 MatZeroRowsColumns_MPIAIJ, 2616 /*49*/ MatSetRandom_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*54*/ MatFDColoringCreate_MPIXAIJ, 2622 0, 2623 MatSetUnfactored_MPIAIJ, 2624 MatPermute_MPIAIJ, 2625 0, 2626 /*59*/ MatGetSubMatrix_MPIAIJ, 2627 MatDestroy_MPIAIJ, 2628 MatView_MPIAIJ, 2629 0, 2630 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2631 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2632 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2637 MatGetRowMinAbs_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*75*/ MatFDColoringApply_AIJ, 2643 MatSetFromOptions_MPIAIJ, 2644 0, 2645 0, 2646 MatFindZeroDiagonals_MPIAIJ, 2647 /*80*/ 0, 2648 0, 2649 0, 2650 /*83*/ MatLoad_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 0, 2655 0, 2656 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2657 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2658 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2659 MatPtAP_MPIAIJ_MPIAIJ, 2660 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2661 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /*99*/ 0, 2667 0, 2668 0, 2669 MatConjugate_MPIAIJ, 2670 0, 2671 /*104*/MatSetValuesRow_MPIAIJ, 2672 MatRealPart_MPIAIJ, 2673 MatImaginaryPart_MPIAIJ, 2674 0, 2675 0, 2676 /*109*/0, 2677 0, 2678 MatGetRowMin_MPIAIJ, 2679 0, 2680 MatMissingDiagonal_MPIAIJ, 2681 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2682 0, 2683 MatGetGhosts_MPIAIJ, 2684 0, 2685 0, 2686 /*119*/0, 2687 0, 2688 0, 2689 0, 2690 MatGetMultiProcBlock_MPIAIJ, 2691 /*124*/MatFindNonzeroRows_MPIAIJ, 2692 MatGetColumnNorms_MPIAIJ, 2693 MatInvertBlockDiagonal_MPIAIJ, 2694 0, 2695 MatGetSubMatricesMPI_MPIAIJ, 2696 /*129*/0, 2697 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2698 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2699 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2700 0, 2701 /*134*/0, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*139*/0, 2707 0, 2708 0, 2709 MatFDColoringSetUp_MPIXAIJ, 2710 MatFindOffBlockDiagonalEntries_MPIAIJ, 2711 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2712 }; 2713 2714 /* ----------------------------------------------------------------------------------------*/ 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2718 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2719 { 2720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2725 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2731 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2732 { 2733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2738 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2744 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2745 { 2746 Mat_MPIAIJ *b; 2747 PetscErrorCode ierr; 2748 2749 PetscFunctionBegin; 2750 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2751 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2752 b = (Mat_MPIAIJ*)B->data; 2753 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2758 #endif 2759 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2760 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2761 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2762 2763 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2764 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2765 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2766 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2767 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2768 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2770 2771 if (!B->preallocated) { 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2774 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2775 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2777 } 2778 2779 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2780 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2781 B->preallocated = PETSC_TRUE; 2782 B->was_assembled = PETSC_FALSE; 2783 B->assembled = PETSC_FALSE;; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2789 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2790 { 2791 Mat mat; 2792 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2793 PetscErrorCode ierr; 2794 2795 PetscFunctionBegin; 2796 *newmat = 0; 2797 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2798 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2799 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2800 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2801 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2802 a = (Mat_MPIAIJ*)mat->data; 2803 2804 mat->factortype = matin->factortype; 2805 mat->assembled = PETSC_TRUE; 2806 mat->insertmode = NOT_SET_VALUES; 2807 mat->preallocated = PETSC_TRUE; 2808 2809 a->size = oldmat->size; 2810 a->rank = oldmat->rank; 2811 a->donotstash = oldmat->donotstash; 2812 a->roworiented = oldmat->roworiented; 2813 a->rowindices = 0; 2814 a->rowvalues = 0; 2815 a->getrowactive = PETSC_FALSE; 2816 2817 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2818 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2819 2820 if (oldmat->colmap) { 2821 #if defined(PETSC_USE_CTABLE) 2822 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2823 #else 2824 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2825 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2826 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2827 #endif 2828 } else a->colmap = 0; 2829 if (oldmat->garray) { 2830 PetscInt len; 2831 len = oldmat->B->cmap->n; 2832 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2833 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2834 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2835 } else a->garray = 0; 2836 2837 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2838 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2839 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2840 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2841 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2842 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2843 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2844 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2845 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2846 *newmat = mat; 2847 PetscFunctionReturn(0); 2848 } 2849 2850 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatLoad_MPIAIJ" 2854 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2855 { 2856 PetscScalar *vals,*svals; 2857 MPI_Comm comm; 2858 PetscErrorCode ierr; 2859 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2860 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2861 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2862 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2863 PetscInt cend,cstart,n,*rowners; 2864 int fd; 2865 PetscInt bs = newMat->rmap->bs; 2866 2867 PetscFunctionBegin; 2868 /* force binary viewer to load .info file if it has not yet done so */ 2869 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2870 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2871 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2872 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2873 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2874 if (!rank) { 2875 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2876 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2877 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2878 } 2879 2880 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2881 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2882 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2883 if (bs < 0) bs = 1; 2884 2885 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2886 M = header[1]; N = header[2]; 2887 2888 /* If global sizes are set, check if they are consistent with that given in the file */ 2889 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2890 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2891 2892 /* determine ownership of all (block) rows */ 2893 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2894 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2895 else m = newMat->rmap->n; /* Set by user */ 2896 2897 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2898 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2899 2900 /* First process needs enough room for process with most rows */ 2901 if (!rank) { 2902 mmax = rowners[1]; 2903 for (i=2; i<=size; i++) { 2904 mmax = PetscMax(mmax, rowners[i]); 2905 } 2906 } else mmax = -1; /* unused, but compilers complain */ 2907 2908 rowners[0] = 0; 2909 for (i=2; i<=size; i++) { 2910 rowners[i] += rowners[i-1]; 2911 } 2912 rstart = rowners[rank]; 2913 rend = rowners[rank+1]; 2914 2915 /* distribute row lengths to all processors */ 2916 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2917 if (!rank) { 2918 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2919 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2920 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2921 for (j=0; j<m; j++) { 2922 procsnz[0] += ourlens[j]; 2923 } 2924 for (i=1; i<size; i++) { 2925 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2926 /* calculate the number of nonzeros on each processor */ 2927 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2928 procsnz[i] += rowlengths[j]; 2929 } 2930 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2931 } 2932 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2933 } else { 2934 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2935 } 2936 2937 if (!rank) { 2938 /* determine max buffer needed and allocate it */ 2939 maxnz = 0; 2940 for (i=0; i<size; i++) { 2941 maxnz = PetscMax(maxnz,procsnz[i]); 2942 } 2943 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2944 2945 /* read in my part of the matrix column indices */ 2946 nz = procsnz[0]; 2947 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2948 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2949 2950 /* read in every one elses and ship off */ 2951 for (i=1; i<size; i++) { 2952 nz = procsnz[i]; 2953 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2954 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2955 } 2956 ierr = PetscFree(cols);CHKERRQ(ierr); 2957 } else { 2958 /* determine buffer space needed for message */ 2959 nz = 0; 2960 for (i=0; i<m; i++) { 2961 nz += ourlens[i]; 2962 } 2963 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2964 2965 /* receive message of column indices*/ 2966 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2967 } 2968 2969 /* determine column ownership if matrix is not square */ 2970 if (N != M) { 2971 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2972 else n = newMat->cmap->n; 2973 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2974 cstart = cend - n; 2975 } else { 2976 cstart = rstart; 2977 cend = rend; 2978 n = cend - cstart; 2979 } 2980 2981 /* loop over local rows, determining number of off diagonal entries */ 2982 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2983 jj = 0; 2984 for (i=0; i<m; i++) { 2985 for (j=0; j<ourlens[i]; j++) { 2986 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2987 jj++; 2988 } 2989 } 2990 2991 for (i=0; i<m; i++) { 2992 ourlens[i] -= offlens[i]; 2993 } 2994 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2995 2996 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2997 2998 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2999 3000 for (i=0; i<m; i++) { 3001 ourlens[i] += offlens[i]; 3002 } 3003 3004 if (!rank) { 3005 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3006 3007 /* read in my part of the matrix numerical values */ 3008 nz = procsnz[0]; 3009 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3010 3011 /* insert into matrix */ 3012 jj = rstart; 3013 smycols = mycols; 3014 svals = vals; 3015 for (i=0; i<m; i++) { 3016 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3017 smycols += ourlens[i]; 3018 svals += ourlens[i]; 3019 jj++; 3020 } 3021 3022 /* read in other processors and ship out */ 3023 for (i=1; i<size; i++) { 3024 nz = procsnz[i]; 3025 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3026 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3027 } 3028 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3029 } else { 3030 /* receive numeric values */ 3031 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3032 3033 /* receive message of values*/ 3034 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3035 3036 /* insert into matrix */ 3037 jj = rstart; 3038 smycols = mycols; 3039 svals = vals; 3040 for (i=0; i<m; i++) { 3041 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3042 smycols += ourlens[i]; 3043 svals += ourlens[i]; 3044 jj++; 3045 } 3046 } 3047 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3048 ierr = PetscFree(vals);CHKERRQ(ierr); 3049 ierr = PetscFree(mycols);CHKERRQ(ierr); 3050 ierr = PetscFree(rowners);CHKERRQ(ierr); 3051 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3058 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3059 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3060 { 3061 PetscErrorCode ierr; 3062 IS iscol_local; 3063 PetscInt csize; 3064 3065 PetscFunctionBegin; 3066 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3067 if (call == MAT_REUSE_MATRIX) { 3068 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3069 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3070 } else { 3071 /* check if we are grabbing all columns*/ 3072 PetscBool isstride; 3073 PetscMPIInt lisstride = 0,gisstride; 3074 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3075 if (isstride) { 3076 PetscInt start,len,mstart,mlen; 3077 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3078 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3079 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3080 if (mstart == start && mlen-mstart == len) lisstride = 1; 3081 } 3082 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3083 if (gisstride) { 3084 PetscInt N; 3085 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3086 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3087 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3088 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3089 } else { 3090 PetscInt cbs; 3091 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3092 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3094 } 3095 } 3096 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3097 if (call == MAT_INITIAL_MATRIX) { 3098 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3099 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3100 } 3101 PetscFunctionReturn(0); 3102 } 3103 3104 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3105 #undef __FUNCT__ 3106 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3107 /* 3108 Not great since it makes two copies of the submatrix, first an SeqAIJ 3109 in local and then by concatenating the local matrices the end result. 3110 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3111 3112 Note: This requires a sequential iscol with all indices. 3113 */ 3114 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3115 { 3116 PetscErrorCode ierr; 3117 PetscMPIInt rank,size; 3118 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3119 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3120 PetscBool allcolumns, colflag; 3121 Mat M,Mreuse; 3122 MatScalar *vwork,*aa; 3123 MPI_Comm comm; 3124 Mat_SeqAIJ *aij; 3125 3126 PetscFunctionBegin; 3127 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3128 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3129 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3130 3131 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3132 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3133 if (colflag && ncol == mat->cmap->N) { 3134 allcolumns = PETSC_TRUE; 3135 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3136 } else { 3137 allcolumns = PETSC_FALSE; 3138 } 3139 if (call == MAT_REUSE_MATRIX) { 3140 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3141 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3142 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3143 } else { 3144 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3145 } 3146 3147 /* 3148 m - number of local rows 3149 n - number of columns (same on all processors) 3150 rstart - first row in new global matrix generated 3151 */ 3152 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3153 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3154 if (call == MAT_INITIAL_MATRIX) { 3155 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3156 ii = aij->i; 3157 jj = aij->j; 3158 3159 /* 3160 Determine the number of non-zeros in the diagonal and off-diagonal 3161 portions of the matrix in order to do correct preallocation 3162 */ 3163 3164 /* first get start and end of "diagonal" columns */ 3165 if (csize == PETSC_DECIDE) { 3166 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3167 if (mglobal == n) { /* square matrix */ 3168 nlocal = m; 3169 } else { 3170 nlocal = n/size + ((n % size) > rank); 3171 } 3172 } else { 3173 nlocal = csize; 3174 } 3175 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3176 rstart = rend - nlocal; 3177 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3178 3179 /* next, compute all the lengths */ 3180 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3181 olens = dlens + m; 3182 for (i=0; i<m; i++) { 3183 jend = ii[i+1] - ii[i]; 3184 olen = 0; 3185 dlen = 0; 3186 for (j=0; j<jend; j++) { 3187 if (*jj < rstart || *jj >= rend) olen++; 3188 else dlen++; 3189 jj++; 3190 } 3191 olens[i] = olen; 3192 dlens[i] = dlen; 3193 } 3194 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3195 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3196 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3197 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3198 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3199 ierr = PetscFree(dlens);CHKERRQ(ierr); 3200 } else { 3201 PetscInt ml,nl; 3202 3203 M = *newmat; 3204 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3205 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3206 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3207 /* 3208 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3209 rather than the slower MatSetValues(). 3210 */ 3211 M->was_assembled = PETSC_TRUE; 3212 M->assembled = PETSC_FALSE; 3213 } 3214 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3215 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3216 ii = aij->i; 3217 jj = aij->j; 3218 aa = aij->a; 3219 for (i=0; i<m; i++) { 3220 row = rstart + i; 3221 nz = ii[i+1] - ii[i]; 3222 cwork = jj; jj += nz; 3223 vwork = aa; aa += nz; 3224 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3225 } 3226 3227 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3229 *newmat = M; 3230 3231 /* save submatrix used in processor for next request */ 3232 if (call == MAT_INITIAL_MATRIX) { 3233 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3234 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3235 } 3236 PetscFunctionReturn(0); 3237 } 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3241 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3242 { 3243 PetscInt m,cstart, cend,j,nnz,i,d; 3244 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3245 const PetscInt *JJ; 3246 PetscScalar *values; 3247 PetscErrorCode ierr; 3248 3249 PetscFunctionBegin; 3250 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3251 3252 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3254 m = B->rmap->n; 3255 cstart = B->cmap->rstart; 3256 cend = B->cmap->rend; 3257 rstart = B->rmap->rstart; 3258 3259 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3260 3261 #if defined(PETSC_USE_DEBUGGING) 3262 for (i=0; i<m; i++) { 3263 nnz = Ii[i+1]- Ii[i]; 3264 JJ = J + Ii[i]; 3265 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3266 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3267 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3268 } 3269 #endif 3270 3271 for (i=0; i<m; i++) { 3272 nnz = Ii[i+1]- Ii[i]; 3273 JJ = J + Ii[i]; 3274 nnz_max = PetscMax(nnz_max,nnz); 3275 d = 0; 3276 for (j=0; j<nnz; j++) { 3277 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3278 } 3279 d_nnz[i] = d; 3280 o_nnz[i] = nnz - d; 3281 } 3282 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3283 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3284 3285 if (v) values = (PetscScalar*)v; 3286 else { 3287 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3288 } 3289 3290 for (i=0; i<m; i++) { 3291 ii = i + rstart; 3292 nnz = Ii[i+1]- Ii[i]; 3293 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3294 } 3295 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3297 3298 if (!v) { 3299 ierr = PetscFree(values);CHKERRQ(ierr); 3300 } 3301 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 3305 #undef __FUNCT__ 3306 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3307 /*@ 3308 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3309 (the default parallel PETSc format). 3310 3311 Collective on MPI_Comm 3312 3313 Input Parameters: 3314 + B - the matrix 3315 . i - the indices into j for the start of each local row (starts with zero) 3316 . j - the column indices for each local row (starts with zero) 3317 - v - optional values in the matrix 3318 3319 Level: developer 3320 3321 Notes: 3322 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3323 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3324 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3325 3326 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3327 3328 The format which is used for the sparse matrix input, is equivalent to a 3329 row-major ordering.. i.e for the following matrix, the input data expected is 3330 as shown 3331 3332 $ 1 0 0 3333 $ 2 0 3 P0 3334 $ ------- 3335 $ 4 5 6 P1 3336 $ 3337 $ Process0 [P0]: rows_owned=[0,1] 3338 $ i = {0,1,3} [size = nrow+1 = 2+1] 3339 $ j = {0,0,2} [size = 3] 3340 $ v = {1,2,3} [size = 3] 3341 $ 3342 $ Process1 [P1]: rows_owned=[2] 3343 $ i = {0,3} [size = nrow+1 = 1+1] 3344 $ j = {0,1,2} [size = 3] 3345 $ v = {4,5,6} [size = 3] 3346 3347 .keywords: matrix, aij, compressed row, sparse, parallel 3348 3349 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3350 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3351 @*/ 3352 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3353 { 3354 PetscErrorCode ierr; 3355 3356 PetscFunctionBegin; 3357 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3358 PetscFunctionReturn(0); 3359 } 3360 3361 #undef __FUNCT__ 3362 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3363 /*@C 3364 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3365 (the default parallel PETSc format). For good matrix assembly performance 3366 the user should preallocate the matrix storage by setting the parameters 3367 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3368 performance can be increased by more than a factor of 50. 3369 3370 Collective on MPI_Comm 3371 3372 Input Parameters: 3373 + B - the matrix 3374 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3375 (same value is used for all local rows) 3376 . d_nnz - array containing the number of nonzeros in the various rows of the 3377 DIAGONAL portion of the local submatrix (possibly different for each row) 3378 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3379 The size of this array is equal to the number of local rows, i.e 'm'. 3380 For matrices that will be factored, you must leave room for (and set) 3381 the diagonal entry even if it is zero. 3382 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3383 submatrix (same value is used for all local rows). 3384 - o_nnz - array containing the number of nonzeros in the various rows of the 3385 OFF-DIAGONAL portion of the local submatrix (possibly different for 3386 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3387 structure. The size of this array is equal to the number 3388 of local rows, i.e 'm'. 3389 3390 If the *_nnz parameter is given then the *_nz parameter is ignored 3391 3392 The AIJ format (also called the Yale sparse matrix format or 3393 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3394 storage. The stored row and column indices begin with zero. 3395 See Users-Manual: ch_mat for details. 3396 3397 The parallel matrix is partitioned such that the first m0 rows belong to 3398 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3399 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3400 3401 The DIAGONAL portion of the local submatrix of a processor can be defined 3402 as the submatrix which is obtained by extraction the part corresponding to 3403 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3404 first row that belongs to the processor, r2 is the last row belonging to 3405 the this processor, and c1-c2 is range of indices of the local part of a 3406 vector suitable for applying the matrix to. This is an mxn matrix. In the 3407 common case of a square matrix, the row and column ranges are the same and 3408 the DIAGONAL part is also square. The remaining portion of the local 3409 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3410 3411 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3412 3413 You can call MatGetInfo() to get information on how effective the preallocation was; 3414 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3415 You can also run with the option -info and look for messages with the string 3416 malloc in them to see if additional memory allocation was needed. 3417 3418 Example usage: 3419 3420 Consider the following 8x8 matrix with 34 non-zero values, that is 3421 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3422 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3423 as follows: 3424 3425 .vb 3426 1 2 0 | 0 3 0 | 0 4 3427 Proc0 0 5 6 | 7 0 0 | 8 0 3428 9 0 10 | 11 0 0 | 12 0 3429 ------------------------------------- 3430 13 0 14 | 15 16 17 | 0 0 3431 Proc1 0 18 0 | 19 20 21 | 0 0 3432 0 0 0 | 22 23 0 | 24 0 3433 ------------------------------------- 3434 Proc2 25 26 27 | 0 0 28 | 29 0 3435 30 0 0 | 31 32 33 | 0 34 3436 .ve 3437 3438 This can be represented as a collection of submatrices as: 3439 3440 .vb 3441 A B C 3442 D E F 3443 G H I 3444 .ve 3445 3446 Where the submatrices A,B,C are owned by proc0, D,E,F are 3447 owned by proc1, G,H,I are owned by proc2. 3448 3449 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3450 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3451 The 'M','N' parameters are 8,8, and have the same values on all procs. 3452 3453 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3454 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3455 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3456 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3457 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3458 matrix, ans [DF] as another SeqAIJ matrix. 3459 3460 When d_nz, o_nz parameters are specified, d_nz storage elements are 3461 allocated for every row of the local diagonal submatrix, and o_nz 3462 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3463 One way to choose d_nz and o_nz is to use the max nonzerors per local 3464 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3465 In this case, the values of d_nz,o_nz are: 3466 .vb 3467 proc0 : dnz = 2, o_nz = 2 3468 proc1 : dnz = 3, o_nz = 2 3469 proc2 : dnz = 1, o_nz = 4 3470 .ve 3471 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3472 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3473 for proc3. i.e we are using 12+15+10=37 storage locations to store 3474 34 values. 3475 3476 When d_nnz, o_nnz parameters are specified, the storage is specified 3477 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3478 In the above case the values for d_nnz,o_nnz are: 3479 .vb 3480 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3481 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3482 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3483 .ve 3484 Here the space allocated is sum of all the above values i.e 34, and 3485 hence pre-allocation is perfect. 3486 3487 Level: intermediate 3488 3489 .keywords: matrix, aij, compressed row, sparse, parallel 3490 3491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3492 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3493 @*/ 3494 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3495 { 3496 PetscErrorCode ierr; 3497 3498 PetscFunctionBegin; 3499 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3500 PetscValidType(B,1); 3501 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3502 PetscFunctionReturn(0); 3503 } 3504 3505 #undef __FUNCT__ 3506 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3507 /*@ 3508 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3509 CSR format the local rows. 3510 3511 Collective on MPI_Comm 3512 3513 Input Parameters: 3514 + comm - MPI communicator 3515 . m - number of local rows (Cannot be PETSC_DECIDE) 3516 . n - This value should be the same as the local size used in creating the 3517 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3518 calculated if N is given) For square matrices n is almost always m. 3519 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3520 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3521 . i - row indices 3522 . j - column indices 3523 - a - matrix values 3524 3525 Output Parameter: 3526 . mat - the matrix 3527 3528 Level: intermediate 3529 3530 Notes: 3531 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3532 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3533 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3534 3535 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3536 3537 The format which is used for the sparse matrix input, is equivalent to a 3538 row-major ordering.. i.e for the following matrix, the input data expected is 3539 as shown 3540 3541 $ 1 0 0 3542 $ 2 0 3 P0 3543 $ ------- 3544 $ 4 5 6 P1 3545 $ 3546 $ Process0 [P0]: rows_owned=[0,1] 3547 $ i = {0,1,3} [size = nrow+1 = 2+1] 3548 $ j = {0,0,2} [size = 3] 3549 $ v = {1,2,3} [size = 3] 3550 $ 3551 $ Process1 [P1]: rows_owned=[2] 3552 $ i = {0,3} [size = nrow+1 = 1+1] 3553 $ j = {0,1,2} [size = 3] 3554 $ v = {4,5,6} [size = 3] 3555 3556 .keywords: matrix, aij, compressed row, sparse, parallel 3557 3558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3559 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3560 @*/ 3561 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3562 { 3563 PetscErrorCode ierr; 3564 3565 PetscFunctionBegin; 3566 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3567 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3568 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3569 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3570 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3571 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3572 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3573 PetscFunctionReturn(0); 3574 } 3575 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "MatCreateAIJ" 3578 /*@C 3579 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3580 (the default parallel PETSc format). For good matrix assembly performance 3581 the user should preallocate the matrix storage by setting the parameters 3582 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3583 performance can be increased by more than a factor of 50. 3584 3585 Collective on MPI_Comm 3586 3587 Input Parameters: 3588 + comm - MPI communicator 3589 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3590 This value should be the same as the local size used in creating the 3591 y vector for the matrix-vector product y = Ax. 3592 . n - This value should be the same as the local size used in creating the 3593 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3594 calculated if N is given) For square matrices n is almost always m. 3595 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3596 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3597 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3598 (same value is used for all local rows) 3599 . d_nnz - array containing the number of nonzeros in the various rows of the 3600 DIAGONAL portion of the local submatrix (possibly different for each row) 3601 or NULL, if d_nz is used to specify the nonzero structure. 3602 The size of this array is equal to the number of local rows, i.e 'm'. 3603 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3604 submatrix (same value is used for all local rows). 3605 - o_nnz - array containing the number of nonzeros in the various rows of the 3606 OFF-DIAGONAL portion of the local submatrix (possibly different for 3607 each row) or NULL, if o_nz is used to specify the nonzero 3608 structure. The size of this array is equal to the number 3609 of local rows, i.e 'm'. 3610 3611 Output Parameter: 3612 . A - the matrix 3613 3614 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3615 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3616 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3617 3618 Notes: 3619 If the *_nnz parameter is given then the *_nz parameter is ignored 3620 3621 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3622 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3623 storage requirements for this matrix. 3624 3625 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3626 processor than it must be used on all processors that share the object for 3627 that argument. 3628 3629 The user MUST specify either the local or global matrix dimensions 3630 (possibly both). 3631 3632 The parallel matrix is partitioned across processors such that the 3633 first m0 rows belong to process 0, the next m1 rows belong to 3634 process 1, the next m2 rows belong to process 2 etc.. where 3635 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3636 values corresponding to [m x N] submatrix. 3637 3638 The columns are logically partitioned with the n0 columns belonging 3639 to 0th partition, the next n1 columns belonging to the next 3640 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3641 3642 The DIAGONAL portion of the local submatrix on any given processor 3643 is the submatrix corresponding to the rows and columns m,n 3644 corresponding to the given processor. i.e diagonal matrix on 3645 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3646 etc. The remaining portion of the local submatrix [m x (N-n)] 3647 constitute the OFF-DIAGONAL portion. The example below better 3648 illustrates this concept. 3649 3650 For a square global matrix we define each processor's diagonal portion 3651 to be its local rows and the corresponding columns (a square submatrix); 3652 each processor's off-diagonal portion encompasses the remainder of the 3653 local matrix (a rectangular submatrix). 3654 3655 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3656 3657 When calling this routine with a single process communicator, a matrix of 3658 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3659 type of communicator, use the construction mechanism: 3660 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3661 3662 By default, this format uses inodes (identical nodes) when possible. 3663 We search for consecutive rows with the same nonzero structure, thereby 3664 reusing matrix information to achieve increased efficiency. 3665 3666 Options Database Keys: 3667 + -mat_no_inode - Do not use inodes 3668 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3669 - -mat_aij_oneindex - Internally use indexing starting at 1 3670 rather than 0. Note that when calling MatSetValues(), 3671 the user still MUST index entries starting at 0! 3672 3673 3674 Example usage: 3675 3676 Consider the following 8x8 matrix with 34 non-zero values, that is 3677 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3678 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3679 as follows: 3680 3681 .vb 3682 1 2 0 | 0 3 0 | 0 4 3683 Proc0 0 5 6 | 7 0 0 | 8 0 3684 9 0 10 | 11 0 0 | 12 0 3685 ------------------------------------- 3686 13 0 14 | 15 16 17 | 0 0 3687 Proc1 0 18 0 | 19 20 21 | 0 0 3688 0 0 0 | 22 23 0 | 24 0 3689 ------------------------------------- 3690 Proc2 25 26 27 | 0 0 28 | 29 0 3691 30 0 0 | 31 32 33 | 0 34 3692 .ve 3693 3694 This can be represented as a collection of submatrices as: 3695 3696 .vb 3697 A B C 3698 D E F 3699 G H I 3700 .ve 3701 3702 Where the submatrices A,B,C are owned by proc0, D,E,F are 3703 owned by proc1, G,H,I are owned by proc2. 3704 3705 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3706 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3707 The 'M','N' parameters are 8,8, and have the same values on all procs. 3708 3709 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3710 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3711 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3712 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3713 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3714 matrix, ans [DF] as another SeqAIJ matrix. 3715 3716 When d_nz, o_nz parameters are specified, d_nz storage elements are 3717 allocated for every row of the local diagonal submatrix, and o_nz 3718 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3719 One way to choose d_nz and o_nz is to use the max nonzerors per local 3720 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3721 In this case, the values of d_nz,o_nz are: 3722 .vb 3723 proc0 : dnz = 2, o_nz = 2 3724 proc1 : dnz = 3, o_nz = 2 3725 proc2 : dnz = 1, o_nz = 4 3726 .ve 3727 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3728 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3729 for proc3. i.e we are using 12+15+10=37 storage locations to store 3730 34 values. 3731 3732 When d_nnz, o_nnz parameters are specified, the storage is specified 3733 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3734 In the above case the values for d_nnz,o_nnz are: 3735 .vb 3736 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3737 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3738 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3739 .ve 3740 Here the space allocated is sum of all the above values i.e 34, and 3741 hence pre-allocation is perfect. 3742 3743 Level: intermediate 3744 3745 .keywords: matrix, aij, compressed row, sparse, parallel 3746 3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3748 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3749 @*/ 3750 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3751 { 3752 PetscErrorCode ierr; 3753 PetscMPIInt size; 3754 3755 PetscFunctionBegin; 3756 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3757 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3758 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3759 if (size > 1) { 3760 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3761 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3764 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3765 } 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3771 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3772 { 3773 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3774 PetscBool flg; 3775 PetscErrorCode ierr; 3776 3777 PetscFunctionBegin; 3778 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3779 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3780 if (Ad) *Ad = a->A; 3781 if (Ao) *Ao = a->B; 3782 if (colmap) *colmap = a->garray; 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3788 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3789 { 3790 PetscErrorCode ierr; 3791 PetscInt m,N,i,rstart,nnz,Ii; 3792 PetscInt *indx; 3793 PetscScalar *values; 3794 3795 PetscFunctionBegin; 3796 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3797 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3798 PetscInt *dnz,*onz,sum,bs,cbs; 3799 3800 if (n == PETSC_DECIDE) { 3801 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3802 } 3803 /* Check sum(n) = N */ 3804 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3805 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3806 3807 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3808 rstart -= m; 3809 3810 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3811 for (i=0; i<m; i++) { 3812 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3813 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3814 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3815 } 3816 3817 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3818 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3819 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3820 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3821 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3822 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3823 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3824 } 3825 3826 /* numeric phase */ 3827 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3828 for (i=0; i<m; i++) { 3829 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3830 Ii = i + rstart; 3831 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3832 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3833 } 3834 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3835 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "MatFileSplit" 3841 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3842 { 3843 PetscErrorCode ierr; 3844 PetscMPIInt rank; 3845 PetscInt m,N,i,rstart,nnz; 3846 size_t len; 3847 const PetscInt *indx; 3848 PetscViewer out; 3849 char *name; 3850 Mat B; 3851 const PetscScalar *values; 3852 3853 PetscFunctionBegin; 3854 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3855 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3856 /* Should this be the type of the diagonal block of A? */ 3857 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3858 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3859 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3860 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3861 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3862 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3863 for (i=0; i<m; i++) { 3864 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3865 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3866 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3867 } 3868 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3870 3871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3872 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3873 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3874 sprintf(name,"%s.%d",outfile,rank); 3875 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3876 ierr = PetscFree(name);CHKERRQ(ierr); 3877 ierr = MatView(B,out);CHKERRQ(ierr); 3878 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3879 ierr = MatDestroy(&B);CHKERRQ(ierr); 3880 PetscFunctionReturn(0); 3881 } 3882 3883 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3886 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3887 { 3888 PetscErrorCode ierr; 3889 Mat_Merge_SeqsToMPI *merge; 3890 PetscContainer container; 3891 3892 PetscFunctionBegin; 3893 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3894 if (container) { 3895 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3907 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3908 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3909 ierr = PetscFree(merge);CHKERRQ(ierr); 3910 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3911 } 3912 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 #include <../src/mat/utils/freespace.h> 3917 #include <petscbt.h> 3918 3919 #undef __FUNCT__ 3920 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3921 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3922 { 3923 PetscErrorCode ierr; 3924 MPI_Comm comm; 3925 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3926 PetscMPIInt size,rank,taga,*len_s; 3927 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3928 PetscInt proc,m; 3929 PetscInt **buf_ri,**buf_rj; 3930 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3931 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3932 MPI_Request *s_waits,*r_waits; 3933 MPI_Status *status; 3934 MatScalar *aa=a->a; 3935 MatScalar **abuf_r,*ba_i; 3936 Mat_Merge_SeqsToMPI *merge; 3937 PetscContainer container; 3938 3939 PetscFunctionBegin; 3940 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3941 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3942 3943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3945 3946 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3947 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3948 3949 bi = merge->bi; 3950 bj = merge->bj; 3951 buf_ri = merge->buf_ri; 3952 buf_rj = merge->buf_rj; 3953 3954 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3955 owners = merge->rowmap->range; 3956 len_s = merge->len_s; 3957 3958 /* send and recv matrix values */ 3959 /*-----------------------------*/ 3960 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3961 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3962 3963 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3964 for (proc=0,k=0; proc<size; proc++) { 3965 if (!len_s[proc]) continue; 3966 i = owners[proc]; 3967 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3968 k++; 3969 } 3970 3971 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3972 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3973 ierr = PetscFree(status);CHKERRQ(ierr); 3974 3975 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3976 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3977 3978 /* insert mat values of mpimat */ 3979 /*----------------------------*/ 3980 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3981 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3982 3983 for (k=0; k<merge->nrecv; k++) { 3984 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3985 nrows = *(buf_ri_k[k]); 3986 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3987 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3988 } 3989 3990 /* set values of ba */ 3991 m = merge->rowmap->n; 3992 for (i=0; i<m; i++) { 3993 arow = owners[rank] + i; 3994 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3995 bnzi = bi[i+1] - bi[i]; 3996 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3997 3998 /* add local non-zero vals of this proc's seqmat into ba */ 3999 anzi = ai[arow+1] - ai[arow]; 4000 aj = a->j + ai[arow]; 4001 aa = a->a + ai[arow]; 4002 nextaj = 0; 4003 for (j=0; nextaj<anzi; j++) { 4004 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4005 ba_i[j] += aa[nextaj++]; 4006 } 4007 } 4008 4009 /* add received vals into ba */ 4010 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4011 /* i-th row */ 4012 if (i == *nextrow[k]) { 4013 anzi = *(nextai[k]+1) - *nextai[k]; 4014 aj = buf_rj[k] + *(nextai[k]); 4015 aa = abuf_r[k] + *(nextai[k]); 4016 nextaj = 0; 4017 for (j=0; nextaj<anzi; j++) { 4018 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4019 ba_i[j] += aa[nextaj++]; 4020 } 4021 } 4022 nextrow[k]++; nextai[k]++; 4023 } 4024 } 4025 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4026 } 4027 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4028 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4029 4030 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4031 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4032 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4033 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4034 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4035 PetscFunctionReturn(0); 4036 } 4037 4038 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4039 4040 #undef __FUNCT__ 4041 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4042 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4043 { 4044 PetscErrorCode ierr; 4045 Mat B_mpi; 4046 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4047 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4048 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4049 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4050 PetscInt len,proc,*dnz,*onz,bs,cbs; 4051 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4052 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4053 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4054 MPI_Status *status; 4055 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4056 PetscBT lnkbt; 4057 Mat_Merge_SeqsToMPI *merge; 4058 PetscContainer container; 4059 4060 PetscFunctionBegin; 4061 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4062 4063 /* make sure it is a PETSc comm */ 4064 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4065 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4066 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4067 4068 ierr = PetscNew(&merge);CHKERRQ(ierr); 4069 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4070 4071 /* determine row ownership */ 4072 /*---------------------------------------------------------*/ 4073 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4074 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4075 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4076 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4077 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4078 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4079 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4080 4081 m = merge->rowmap->n; 4082 owners = merge->rowmap->range; 4083 4084 /* determine the number of messages to send, their lengths */ 4085 /*---------------------------------------------------------*/ 4086 len_s = merge->len_s; 4087 4088 len = 0; /* length of buf_si[] */ 4089 merge->nsend = 0; 4090 for (proc=0; proc<size; proc++) { 4091 len_si[proc] = 0; 4092 if (proc == rank) { 4093 len_s[proc] = 0; 4094 } else { 4095 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4096 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4097 } 4098 if (len_s[proc]) { 4099 merge->nsend++; 4100 nrows = 0; 4101 for (i=owners[proc]; i<owners[proc+1]; i++) { 4102 if (ai[i+1] > ai[i]) nrows++; 4103 } 4104 len_si[proc] = 2*(nrows+1); 4105 len += len_si[proc]; 4106 } 4107 } 4108 4109 /* determine the number and length of messages to receive for ij-structure */ 4110 /*-------------------------------------------------------------------------*/ 4111 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4112 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4113 4114 /* post the Irecv of j-structure */ 4115 /*-------------------------------*/ 4116 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4117 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4118 4119 /* post the Isend of j-structure */ 4120 /*--------------------------------*/ 4121 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4122 4123 for (proc=0, k=0; proc<size; proc++) { 4124 if (!len_s[proc]) continue; 4125 i = owners[proc]; 4126 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4127 k++; 4128 } 4129 4130 /* receives and sends of j-structure are complete */ 4131 /*------------------------------------------------*/ 4132 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4133 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4134 4135 /* send and recv i-structure */ 4136 /*---------------------------*/ 4137 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4138 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4139 4140 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4141 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4142 for (proc=0,k=0; proc<size; proc++) { 4143 if (!len_s[proc]) continue; 4144 /* form outgoing message for i-structure: 4145 buf_si[0]: nrows to be sent 4146 [1:nrows]: row index (global) 4147 [nrows+1:2*nrows+1]: i-structure index 4148 */ 4149 /*-------------------------------------------*/ 4150 nrows = len_si[proc]/2 - 1; 4151 buf_si_i = buf_si + nrows+1; 4152 buf_si[0] = nrows; 4153 buf_si_i[0] = 0; 4154 nrows = 0; 4155 for (i=owners[proc]; i<owners[proc+1]; i++) { 4156 anzi = ai[i+1] - ai[i]; 4157 if (anzi) { 4158 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4159 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4160 nrows++; 4161 } 4162 } 4163 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4164 k++; 4165 buf_si += len_si[proc]; 4166 } 4167 4168 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4169 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4170 4171 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4172 for (i=0; i<merge->nrecv; i++) { 4173 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4174 } 4175 4176 ierr = PetscFree(len_si);CHKERRQ(ierr); 4177 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4178 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4179 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4180 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4181 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4182 ierr = PetscFree(status);CHKERRQ(ierr); 4183 4184 /* compute a local seq matrix in each processor */ 4185 /*----------------------------------------------*/ 4186 /* allocate bi array and free space for accumulating nonzero column info */ 4187 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4188 bi[0] = 0; 4189 4190 /* create and initialize a linked list */ 4191 nlnk = N+1; 4192 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4193 4194 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4195 len = ai[owners[rank+1]] - ai[owners[rank]]; 4196 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4197 4198 current_space = free_space; 4199 4200 /* determine symbolic info for each local row */ 4201 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4202 4203 for (k=0; k<merge->nrecv; k++) { 4204 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4205 nrows = *buf_ri_k[k]; 4206 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4207 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4208 } 4209 4210 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4211 len = 0; 4212 for (i=0; i<m; i++) { 4213 bnzi = 0; 4214 /* add local non-zero cols of this proc's seqmat into lnk */ 4215 arow = owners[rank] + i; 4216 anzi = ai[arow+1] - ai[arow]; 4217 aj = a->j + ai[arow]; 4218 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4219 bnzi += nlnk; 4220 /* add received col data into lnk */ 4221 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4222 if (i == *nextrow[k]) { /* i-th row */ 4223 anzi = *(nextai[k]+1) - *nextai[k]; 4224 aj = buf_rj[k] + *nextai[k]; 4225 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4226 bnzi += nlnk; 4227 nextrow[k]++; nextai[k]++; 4228 } 4229 } 4230 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4231 4232 /* if free space is not available, make more free space */ 4233 if (current_space->local_remaining<bnzi) { 4234 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4235 nspacedouble++; 4236 } 4237 /* copy data into free space, then initialize lnk */ 4238 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4239 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4240 4241 current_space->array += bnzi; 4242 current_space->local_used += bnzi; 4243 current_space->local_remaining -= bnzi; 4244 4245 bi[i+1] = bi[i] + bnzi; 4246 } 4247 4248 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4249 4250 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4251 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4252 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4253 4254 /* create symbolic parallel matrix B_mpi */ 4255 /*---------------------------------------*/ 4256 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4257 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4258 if (n==PETSC_DECIDE) { 4259 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4260 } else { 4261 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4262 } 4263 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4264 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4265 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4266 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4267 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4268 4269 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4270 B_mpi->assembled = PETSC_FALSE; 4271 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4272 merge->bi = bi; 4273 merge->bj = bj; 4274 merge->buf_ri = buf_ri; 4275 merge->buf_rj = buf_rj; 4276 merge->coi = NULL; 4277 merge->coj = NULL; 4278 merge->owners_co = NULL; 4279 4280 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4281 4282 /* attach the supporting struct to B_mpi for reuse */ 4283 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4284 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4285 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4286 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4287 *mpimat = B_mpi; 4288 4289 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4290 PetscFunctionReturn(0); 4291 } 4292 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4295 /*@C 4296 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4297 matrices from each processor 4298 4299 Collective on MPI_Comm 4300 4301 Input Parameters: 4302 + comm - the communicators the parallel matrix will live on 4303 . seqmat - the input sequential matrices 4304 . m - number of local rows (or PETSC_DECIDE) 4305 . n - number of local columns (or PETSC_DECIDE) 4306 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4307 4308 Output Parameter: 4309 . mpimat - the parallel matrix generated 4310 4311 Level: advanced 4312 4313 Notes: 4314 The dimensions of the sequential matrix in each processor MUST be the same. 4315 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4316 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4317 @*/ 4318 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4319 { 4320 PetscErrorCode ierr; 4321 PetscMPIInt size; 4322 4323 PetscFunctionBegin; 4324 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4325 if (size == 1) { 4326 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4327 if (scall == MAT_INITIAL_MATRIX) { 4328 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4329 } else { 4330 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4331 } 4332 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4333 PetscFunctionReturn(0); 4334 } 4335 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4336 if (scall == MAT_INITIAL_MATRIX) { 4337 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4338 } 4339 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4340 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4341 PetscFunctionReturn(0); 4342 } 4343 4344 #undef __FUNCT__ 4345 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4346 /*@ 4347 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4348 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4349 with MatGetSize() 4350 4351 Not Collective 4352 4353 Input Parameters: 4354 + A - the matrix 4355 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4356 4357 Output Parameter: 4358 . A_loc - the local sequential matrix generated 4359 4360 Level: developer 4361 4362 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4363 4364 @*/ 4365 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4366 { 4367 PetscErrorCode ierr; 4368 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4369 Mat_SeqAIJ *mat,*a,*b; 4370 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4371 MatScalar *aa,*ba,*cam; 4372 PetscScalar *ca; 4373 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4374 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4375 PetscBool match; 4376 MPI_Comm comm; 4377 PetscMPIInt size; 4378 4379 PetscFunctionBegin; 4380 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4381 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4382 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4383 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4384 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4385 4386 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4387 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4388 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4389 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4390 aa = a->a; ba = b->a; 4391 if (scall == MAT_INITIAL_MATRIX) { 4392 if (size == 1) { 4393 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4394 PetscFunctionReturn(0); 4395 } 4396 4397 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4398 ci[0] = 0; 4399 for (i=0; i<am; i++) { 4400 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4401 } 4402 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4403 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4404 k = 0; 4405 for (i=0; i<am; i++) { 4406 ncols_o = bi[i+1] - bi[i]; 4407 ncols_d = ai[i+1] - ai[i]; 4408 /* off-diagonal portion of A */ 4409 for (jo=0; jo<ncols_o; jo++) { 4410 col = cmap[*bj]; 4411 if (col >= cstart) break; 4412 cj[k] = col; bj++; 4413 ca[k++] = *ba++; 4414 } 4415 /* diagonal portion of A */ 4416 for (j=0; j<ncols_d; j++) { 4417 cj[k] = cstart + *aj++; 4418 ca[k++] = *aa++; 4419 } 4420 /* off-diagonal portion of A */ 4421 for (j=jo; j<ncols_o; j++) { 4422 cj[k] = cmap[*bj++]; 4423 ca[k++] = *ba++; 4424 } 4425 } 4426 /* put together the new matrix */ 4427 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4428 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4429 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4430 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4431 mat->free_a = PETSC_TRUE; 4432 mat->free_ij = PETSC_TRUE; 4433 mat->nonew = 0; 4434 } else if (scall == MAT_REUSE_MATRIX) { 4435 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4436 ci = mat->i; cj = mat->j; cam = mat->a; 4437 for (i=0; i<am; i++) { 4438 /* off-diagonal portion of A */ 4439 ncols_o = bi[i+1] - bi[i]; 4440 for (jo=0; jo<ncols_o; jo++) { 4441 col = cmap[*bj]; 4442 if (col >= cstart) break; 4443 *cam++ = *ba++; bj++; 4444 } 4445 /* diagonal portion of A */ 4446 ncols_d = ai[i+1] - ai[i]; 4447 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4448 /* off-diagonal portion of A */ 4449 for (j=jo; j<ncols_o; j++) { 4450 *cam++ = *ba++; bj++; 4451 } 4452 } 4453 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4454 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4455 PetscFunctionReturn(0); 4456 } 4457 4458 #undef __FUNCT__ 4459 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4460 /*@C 4461 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4462 4463 Not Collective 4464 4465 Input Parameters: 4466 + A - the matrix 4467 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4468 - row, col - index sets of rows and columns to extract (or NULL) 4469 4470 Output Parameter: 4471 . A_loc - the local sequential matrix generated 4472 4473 Level: developer 4474 4475 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4476 4477 @*/ 4478 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4479 { 4480 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4481 PetscErrorCode ierr; 4482 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4483 IS isrowa,iscola; 4484 Mat *aloc; 4485 PetscBool match; 4486 4487 PetscFunctionBegin; 4488 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4489 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4490 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4491 if (!row) { 4492 start = A->rmap->rstart; end = A->rmap->rend; 4493 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4494 } else { 4495 isrowa = *row; 4496 } 4497 if (!col) { 4498 start = A->cmap->rstart; 4499 cmap = a->garray; 4500 nzA = a->A->cmap->n; 4501 nzB = a->B->cmap->n; 4502 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4503 ncols = 0; 4504 for (i=0; i<nzB; i++) { 4505 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4506 else break; 4507 } 4508 imark = i; 4509 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4510 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4511 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4512 } else { 4513 iscola = *col; 4514 } 4515 if (scall != MAT_INITIAL_MATRIX) { 4516 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4517 aloc[0] = *A_loc; 4518 } 4519 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4520 *A_loc = aloc[0]; 4521 ierr = PetscFree(aloc);CHKERRQ(ierr); 4522 if (!row) { 4523 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4524 } 4525 if (!col) { 4526 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4527 } 4528 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4529 PetscFunctionReturn(0); 4530 } 4531 4532 #undef __FUNCT__ 4533 #define __FUNCT__ "MatGetBrowsOfAcols" 4534 /*@C 4535 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4536 4537 Collective on Mat 4538 4539 Input Parameters: 4540 + A,B - the matrices in mpiaij format 4541 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4542 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4543 4544 Output Parameter: 4545 + rowb, colb - index sets of rows and columns of B to extract 4546 - B_seq - the sequential matrix generated 4547 4548 Level: developer 4549 4550 @*/ 4551 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4552 { 4553 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4554 PetscErrorCode ierr; 4555 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4556 IS isrowb,iscolb; 4557 Mat *bseq=NULL; 4558 4559 PetscFunctionBegin; 4560 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4561 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4562 } 4563 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4564 4565 if (scall == MAT_INITIAL_MATRIX) { 4566 start = A->cmap->rstart; 4567 cmap = a->garray; 4568 nzA = a->A->cmap->n; 4569 nzB = a->B->cmap->n; 4570 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4571 ncols = 0; 4572 for (i=0; i<nzB; i++) { /* row < local row index */ 4573 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4574 else break; 4575 } 4576 imark = i; 4577 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4578 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4579 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4580 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4581 } else { 4582 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4583 isrowb = *rowb; iscolb = *colb; 4584 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4585 bseq[0] = *B_seq; 4586 } 4587 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4588 *B_seq = bseq[0]; 4589 ierr = PetscFree(bseq);CHKERRQ(ierr); 4590 if (!rowb) { 4591 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4592 } else { 4593 *rowb = isrowb; 4594 } 4595 if (!colb) { 4596 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4597 } else { 4598 *colb = iscolb; 4599 } 4600 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4601 PetscFunctionReturn(0); 4602 } 4603 4604 #undef __FUNCT__ 4605 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4606 /* 4607 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4608 of the OFF-DIAGONAL portion of local A 4609 4610 Collective on Mat 4611 4612 Input Parameters: 4613 + A,B - the matrices in mpiaij format 4614 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4615 4616 Output Parameter: 4617 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4618 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4619 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4620 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4621 4622 Level: developer 4623 4624 */ 4625 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4626 { 4627 VecScatter_MPI_General *gen_to,*gen_from; 4628 PetscErrorCode ierr; 4629 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4630 Mat_SeqAIJ *b_oth; 4631 VecScatter ctx =a->Mvctx; 4632 MPI_Comm comm; 4633 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4634 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4635 PetscScalar *rvalues,*svalues; 4636 MatScalar *b_otha,*bufa,*bufA; 4637 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4638 MPI_Request *rwaits = NULL,*swaits = NULL; 4639 MPI_Status *sstatus,rstatus; 4640 PetscMPIInt jj,size; 4641 PetscInt *cols,sbs,rbs; 4642 PetscScalar *vals; 4643 4644 PetscFunctionBegin; 4645 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4646 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4647 4648 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4649 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4650 } 4651 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4652 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4653 4654 gen_to = (VecScatter_MPI_General*)ctx->todata; 4655 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4656 rvalues = gen_from->values; /* holds the length of receiving row */ 4657 svalues = gen_to->values; /* holds the length of sending row */ 4658 nrecvs = gen_from->n; 4659 nsends = gen_to->n; 4660 4661 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4662 srow = gen_to->indices; /* local row index to be sent */ 4663 sstarts = gen_to->starts; 4664 sprocs = gen_to->procs; 4665 sstatus = gen_to->sstatus; 4666 sbs = gen_to->bs; 4667 rstarts = gen_from->starts; 4668 rprocs = gen_from->procs; 4669 rbs = gen_from->bs; 4670 4671 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4672 if (scall == MAT_INITIAL_MATRIX) { 4673 /* i-array */ 4674 /*---------*/ 4675 /* post receives */ 4676 for (i=0; i<nrecvs; i++) { 4677 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4678 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4679 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4680 } 4681 4682 /* pack the outgoing message */ 4683 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4684 4685 sstartsj[0] = 0; 4686 rstartsj[0] = 0; 4687 len = 0; /* total length of j or a array to be sent */ 4688 k = 0; 4689 for (i=0; i<nsends; i++) { 4690 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4691 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4692 for (j=0; j<nrows; j++) { 4693 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4694 for (l=0; l<sbs; l++) { 4695 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4696 4697 rowlen[j*sbs+l] = ncols; 4698 4699 len += ncols; 4700 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4701 } 4702 k++; 4703 } 4704 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4705 4706 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4707 } 4708 /* recvs and sends of i-array are completed */ 4709 i = nrecvs; 4710 while (i--) { 4711 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4712 } 4713 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4714 4715 /* allocate buffers for sending j and a arrays */ 4716 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4717 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4718 4719 /* create i-array of B_oth */ 4720 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4721 4722 b_othi[0] = 0; 4723 len = 0; /* total length of j or a array to be received */ 4724 k = 0; 4725 for (i=0; i<nrecvs; i++) { 4726 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4727 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4728 for (j=0; j<nrows; j++) { 4729 b_othi[k+1] = b_othi[k] + rowlen[j]; 4730 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4731 k++; 4732 } 4733 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4734 } 4735 4736 /* allocate space for j and a arrrays of B_oth */ 4737 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4738 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4739 4740 /* j-array */ 4741 /*---------*/ 4742 /* post receives of j-array */ 4743 for (i=0; i<nrecvs; i++) { 4744 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4745 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4746 } 4747 4748 /* pack the outgoing message j-array */ 4749 k = 0; 4750 for (i=0; i<nsends; i++) { 4751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4752 bufJ = bufj+sstartsj[i]; 4753 for (j=0; j<nrows; j++) { 4754 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4755 for (ll=0; ll<sbs; ll++) { 4756 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4757 for (l=0; l<ncols; l++) { 4758 *bufJ++ = cols[l]; 4759 } 4760 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4761 } 4762 } 4763 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4764 } 4765 4766 /* recvs and sends of j-array are completed */ 4767 i = nrecvs; 4768 while (i--) { 4769 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4770 } 4771 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4772 } else if (scall == MAT_REUSE_MATRIX) { 4773 sstartsj = *startsj_s; 4774 rstartsj = *startsj_r; 4775 bufa = *bufa_ptr; 4776 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4777 b_otha = b_oth->a; 4778 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4779 4780 /* a-array */ 4781 /*---------*/ 4782 /* post receives of a-array */ 4783 for (i=0; i<nrecvs; i++) { 4784 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4785 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4786 } 4787 4788 /* pack the outgoing message a-array */ 4789 k = 0; 4790 for (i=0; i<nsends; i++) { 4791 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4792 bufA = bufa+sstartsj[i]; 4793 for (j=0; j<nrows; j++) { 4794 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4795 for (ll=0; ll<sbs; ll++) { 4796 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4797 for (l=0; l<ncols; l++) { 4798 *bufA++ = vals[l]; 4799 } 4800 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4801 } 4802 } 4803 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4804 } 4805 /* recvs and sends of a-array are completed */ 4806 i = nrecvs; 4807 while (i--) { 4808 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4809 } 4810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4811 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4812 4813 if (scall == MAT_INITIAL_MATRIX) { 4814 /* put together the new matrix */ 4815 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4816 4817 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4818 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4819 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4820 b_oth->free_a = PETSC_TRUE; 4821 b_oth->free_ij = PETSC_TRUE; 4822 b_oth->nonew = 0; 4823 4824 ierr = PetscFree(bufj);CHKERRQ(ierr); 4825 if (!startsj_s || !bufa_ptr) { 4826 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4827 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4828 } else { 4829 *startsj_s = sstartsj; 4830 *startsj_r = rstartsj; 4831 *bufa_ptr = bufa; 4832 } 4833 } 4834 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4835 PetscFunctionReturn(0); 4836 } 4837 4838 #undef __FUNCT__ 4839 #define __FUNCT__ "MatGetCommunicationStructs" 4840 /*@C 4841 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4842 4843 Not Collective 4844 4845 Input Parameters: 4846 . A - The matrix in mpiaij format 4847 4848 Output Parameter: 4849 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4850 . colmap - A map from global column index to local index into lvec 4851 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4852 4853 Level: developer 4854 4855 @*/ 4856 #if defined(PETSC_USE_CTABLE) 4857 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4858 #else 4859 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4860 #endif 4861 { 4862 Mat_MPIAIJ *a; 4863 4864 PetscFunctionBegin; 4865 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4866 PetscValidPointer(lvec, 2); 4867 PetscValidPointer(colmap, 3); 4868 PetscValidPointer(multScatter, 4); 4869 a = (Mat_MPIAIJ*) A->data; 4870 if (lvec) *lvec = a->lvec; 4871 if (colmap) *colmap = a->colmap; 4872 if (multScatter) *multScatter = a->Mvctx; 4873 PetscFunctionReturn(0); 4874 } 4875 4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4878 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4879 #if defined(PETSC_HAVE_ELEMENTAL) 4880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4881 #endif 4882 #if defined(PETSC_HAVE_HYPRE) 4883 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4884 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 4885 #endif 4886 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 4887 4888 #undef __FUNCT__ 4889 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4890 /* 4891 Computes (B'*A')' since computing B*A directly is untenable 4892 4893 n p p 4894 ( ) ( ) ( ) 4895 m ( A ) * n ( B ) = m ( C ) 4896 ( ) ( ) ( ) 4897 4898 */ 4899 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4900 { 4901 PetscErrorCode ierr; 4902 Mat At,Bt,Ct; 4903 4904 PetscFunctionBegin; 4905 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4906 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4907 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4908 ierr = MatDestroy(&At);CHKERRQ(ierr); 4909 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4910 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4911 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4912 PetscFunctionReturn(0); 4913 } 4914 4915 #undef __FUNCT__ 4916 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4917 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4918 { 4919 PetscErrorCode ierr; 4920 PetscInt m=A->rmap->n,n=B->cmap->n; 4921 Mat Cmat; 4922 4923 PetscFunctionBegin; 4924 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4925 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4926 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4927 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4928 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4929 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4930 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4931 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4932 4933 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4934 4935 *C = Cmat; 4936 PetscFunctionReturn(0); 4937 } 4938 4939 /* ----------------------------------------------------------------*/ 4940 #undef __FUNCT__ 4941 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4942 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4943 { 4944 PetscErrorCode ierr; 4945 4946 PetscFunctionBegin; 4947 if (scall == MAT_INITIAL_MATRIX) { 4948 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4949 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4950 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4951 } 4952 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4953 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4954 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4955 PetscFunctionReturn(0); 4956 } 4957 4958 /*MC 4959 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4960 4961 Options Database Keys: 4962 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4963 4964 Level: beginner 4965 4966 .seealso: MatCreateAIJ() 4967 M*/ 4968 4969 #undef __FUNCT__ 4970 #define __FUNCT__ "MatCreate_MPIAIJ" 4971 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4972 { 4973 Mat_MPIAIJ *b; 4974 PetscErrorCode ierr; 4975 PetscMPIInt size; 4976 4977 PetscFunctionBegin; 4978 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4979 4980 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4981 B->data = (void*)b; 4982 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4983 B->assembled = PETSC_FALSE; 4984 B->insertmode = NOT_SET_VALUES; 4985 b->size = size; 4986 4987 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4988 4989 /* build cache for off array entries formed */ 4990 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4991 4992 b->donotstash = PETSC_FALSE; 4993 b->colmap = 0; 4994 b->garray = 0; 4995 b->roworiented = PETSC_TRUE; 4996 4997 /* stuff used for matrix vector multiply */ 4998 b->lvec = NULL; 4999 b->Mvctx = NULL; 5000 5001 /* stuff for MatGetRow() */ 5002 b->rowindices = 0; 5003 b->rowvalues = 0; 5004 b->getrowactive = PETSC_FALSE; 5005 5006 /* flexible pointer used in CUSP/CUSPARSE classes */ 5007 b->spptr = NULL; 5008 5009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5019 #if defined(PETSC_HAVE_ELEMENTAL) 5020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5021 #endif 5022 #if defined(PETSC_HAVE_HYPRE) 5023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5024 #endif 5025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5029 #if defined(PETSC_HAVE_HYPRE) 5030 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5031 #endif 5032 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5033 PetscFunctionReturn(0); 5034 } 5035 5036 #undef __FUNCT__ 5037 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5038 /*@C 5039 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5040 and "off-diagonal" part of the matrix in CSR format. 5041 5042 Collective on MPI_Comm 5043 5044 Input Parameters: 5045 + comm - MPI communicator 5046 . m - number of local rows (Cannot be PETSC_DECIDE) 5047 . n - This value should be the same as the local size used in creating the 5048 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5049 calculated if N is given) For square matrices n is almost always m. 5050 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5051 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5052 . i - row indices for "diagonal" portion of matrix 5053 . j - column indices 5054 . a - matrix values 5055 . oi - row indices for "off-diagonal" portion of matrix 5056 . oj - column indices 5057 - oa - matrix values 5058 5059 Output Parameter: 5060 . mat - the matrix 5061 5062 Level: advanced 5063 5064 Notes: 5065 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5066 must free the arrays once the matrix has been destroyed and not before. 5067 5068 The i and j indices are 0 based 5069 5070 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5071 5072 This sets local rows and cannot be used to set off-processor values. 5073 5074 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5075 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5076 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5077 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5078 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5079 communication if it is known that only local entries will be set. 5080 5081 .keywords: matrix, aij, compressed row, sparse, parallel 5082 5083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5084 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5085 @*/ 5086 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5087 { 5088 PetscErrorCode ierr; 5089 Mat_MPIAIJ *maij; 5090 5091 PetscFunctionBegin; 5092 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5093 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5094 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5095 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5096 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5097 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5098 maij = (Mat_MPIAIJ*) (*mat)->data; 5099 5100 (*mat)->preallocated = PETSC_TRUE; 5101 5102 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5103 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5104 5105 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5106 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5107 5108 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5109 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5110 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5111 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5112 5113 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5114 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5115 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5116 PetscFunctionReturn(0); 5117 } 5118 5119 /* 5120 Special version for direct calls from Fortran 5121 */ 5122 #include <petsc/private/fortranimpl.h> 5123 5124 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5125 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5126 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5127 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5128 #endif 5129 5130 /* Change these macros so can be used in void function */ 5131 #undef CHKERRQ 5132 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5133 #undef SETERRQ2 5134 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5135 #undef SETERRQ3 5136 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5137 #undef SETERRQ 5138 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5139 5140 #undef __FUNCT__ 5141 #define __FUNCT__ "matsetvaluesmpiaij_" 5142 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5143 { 5144 Mat mat = *mmat; 5145 PetscInt m = *mm, n = *mn; 5146 InsertMode addv = *maddv; 5147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5148 PetscScalar value; 5149 PetscErrorCode ierr; 5150 5151 MatCheckPreallocated(mat,1); 5152 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5153 5154 #if defined(PETSC_USE_DEBUG) 5155 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5156 #endif 5157 { 5158 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5159 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5160 PetscBool roworiented = aij->roworiented; 5161 5162 /* Some Variables required in the macro */ 5163 Mat A = aij->A; 5164 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5165 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5166 MatScalar *aa = a->a; 5167 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5168 Mat B = aij->B; 5169 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5170 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5171 MatScalar *ba = b->a; 5172 5173 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5174 PetscInt nonew = a->nonew; 5175 MatScalar *ap1,*ap2; 5176 5177 PetscFunctionBegin; 5178 for (i=0; i<m; i++) { 5179 if (im[i] < 0) continue; 5180 #if defined(PETSC_USE_DEBUG) 5181 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5182 #endif 5183 if (im[i] >= rstart && im[i] < rend) { 5184 row = im[i] - rstart; 5185 lastcol1 = -1; 5186 rp1 = aj + ai[row]; 5187 ap1 = aa + ai[row]; 5188 rmax1 = aimax[row]; 5189 nrow1 = ailen[row]; 5190 low1 = 0; 5191 high1 = nrow1; 5192 lastcol2 = -1; 5193 rp2 = bj + bi[row]; 5194 ap2 = ba + bi[row]; 5195 rmax2 = bimax[row]; 5196 nrow2 = bilen[row]; 5197 low2 = 0; 5198 high2 = nrow2; 5199 5200 for (j=0; j<n; j++) { 5201 if (roworiented) value = v[i*n+j]; 5202 else value = v[i+j*m]; 5203 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5204 if (in[j] >= cstart && in[j] < cend) { 5205 col = in[j] - cstart; 5206 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5207 } else if (in[j] < 0) continue; 5208 #if defined(PETSC_USE_DEBUG) 5209 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5210 #endif 5211 else { 5212 if (mat->was_assembled) { 5213 if (!aij->colmap) { 5214 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5215 } 5216 #if defined(PETSC_USE_CTABLE) 5217 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5218 col--; 5219 #else 5220 col = aij->colmap[in[j]] - 1; 5221 #endif 5222 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5223 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5224 col = in[j]; 5225 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5226 B = aij->B; 5227 b = (Mat_SeqAIJ*)B->data; 5228 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5229 rp2 = bj + bi[row]; 5230 ap2 = ba + bi[row]; 5231 rmax2 = bimax[row]; 5232 nrow2 = bilen[row]; 5233 low2 = 0; 5234 high2 = nrow2; 5235 bm = aij->B->rmap->n; 5236 ba = b->a; 5237 } 5238 } else col = in[j]; 5239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5240 } 5241 } 5242 } else if (!aij->donotstash) { 5243 if (roworiented) { 5244 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5245 } else { 5246 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5247 } 5248 } 5249 } 5250 } 5251 PetscFunctionReturnVoid(); 5252 } 5253 5254