1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscBool lf; 1105 PetscMPIInt size; 1106 1107 PetscFunctionBegin; 1108 /* Easy test: symmetric diagonal block */ 1109 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1110 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1111 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1112 if (!*f) PetscFunctionReturn(0); 1113 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1114 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1115 if (size == 1) PetscFunctionReturn(0); 1116 1117 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1118 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1119 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1120 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1121 for (i=0; i<first; i++) notme[i] = i; 1122 for (i=last; i<M; i++) notme[i-last+first] = i; 1123 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1124 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1125 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1126 Aoff = Aoffs[0]; 1127 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1128 Boff = Boffs[0]; 1129 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1130 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1131 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1132 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1133 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1134 ierr = PetscFree(notme);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1139 { 1140 PetscErrorCode ierr; 1141 1142 PetscFunctionBegin; 1143 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 1152 PetscFunctionBegin; 1153 /* do nondiagonal part */ 1154 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1155 /* send it on its way */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 /* do local part */ 1158 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1159 /* receive remote parts */ 1160 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 This only works correctly for square matrices where the subblock A->A is the 1166 diagonal block 1167 */ 1168 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1169 { 1170 PetscErrorCode ierr; 1171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1172 1173 PetscFunctionBegin; 1174 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1175 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1176 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1181 { 1182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1187 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 PetscErrorCode ierr; 1195 1196 PetscFunctionBegin; 1197 #if defined(PETSC_USE_LOG) 1198 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1199 #endif 1200 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1201 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1202 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1203 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1204 #if defined(PETSC_USE_CTABLE) 1205 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1206 #else 1207 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1208 #endif 1209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1210 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1212 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1213 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1214 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1215 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1216 1217 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_ELEMENTAL) 1227 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 #if defined(PETSC_HAVE_HYPRE) 1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1242 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1243 PetscErrorCode ierr; 1244 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1245 int fd; 1246 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1247 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1248 PetscScalar *column_values; 1249 PetscInt message_count,flowcontrolcount; 1250 FILE *file; 1251 1252 PetscFunctionBegin; 1253 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1254 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1255 nz = A->nz + B->nz; 1256 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1257 if (!rank) { 1258 header[0] = MAT_FILE_CLASSID; 1259 header[1] = mat->rmap->N; 1260 header[2] = mat->cmap->N; 1261 1262 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 /* get largest number of rows any processor has */ 1265 rlen = mat->rmap->n; 1266 range = mat->rmap->range; 1267 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1268 } else { 1269 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 rlen = mat->rmap->n; 1271 } 1272 1273 /* load up the local row counts */ 1274 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1275 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1276 1277 /* store the row lengths to the file */ 1278 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1279 if (!rank) { 1280 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1281 for (i=1; i<size; i++) { 1282 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1283 rlen = range[i+1] - range[i]; 1284 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1286 } 1287 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1290 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1291 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1292 } 1293 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1294 1295 /* load up the local column indices */ 1296 nzmax = nz; /* th processor needs space a largest processor needs */ 1297 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1298 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1299 cnt = 0; 1300 for (i=0; i<mat->rmap->n; i++) { 1301 for (j=B->i[i]; j<B->i[i+1]; j++) { 1302 if ((col = garray[B->j[j]]) > cstart) break; 1303 column_indices[cnt++] = col; 1304 } 1305 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1306 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1307 } 1308 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1309 1310 /* store the column indices to the file */ 1311 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1312 if (!rank) { 1313 MPI_Status status; 1314 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 for (i=1; i<size; i++) { 1316 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1317 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1319 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1320 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1321 } 1322 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1323 } else { 1324 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1325 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1326 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1327 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1328 } 1329 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1330 1331 /* load up the local column values */ 1332 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1333 cnt = 0; 1334 for (i=0; i<mat->rmap->n; i++) { 1335 for (j=B->i[i]; j<B->i[i+1]; j++) { 1336 if (garray[B->j[j]] > cstart) break; 1337 column_values[cnt++] = B->a[j]; 1338 } 1339 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1340 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1341 } 1342 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1343 1344 /* store the column values to the file */ 1345 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1346 if (!rank) { 1347 MPI_Status status; 1348 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1349 for (i=1; i<size; i++) { 1350 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1351 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1352 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1353 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1354 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1355 } 1356 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1357 } else { 1358 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1359 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1360 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1361 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1362 } 1363 ierr = PetscFree(column_values);CHKERRQ(ierr); 1364 1365 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1366 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #include <petscdraw.h> 1371 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1372 { 1373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1374 PetscErrorCode ierr; 1375 PetscMPIInt rank = aij->rank,size = aij->size; 1376 PetscBool isdraw,iascii,isbinary; 1377 PetscViewer sviewer; 1378 PetscViewerFormat format; 1379 1380 PetscFunctionBegin; 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1382 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1383 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1384 if (iascii) { 1385 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1386 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1387 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1388 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1389 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1390 for (i=0; i<(PetscInt)size; i++) { 1391 nmax = PetscMax(nmax,nz[i]); 1392 nmin = PetscMin(nmin,nz[i]); 1393 navg += nz[i]; 1394 } 1395 ierr = PetscFree(nz);CHKERRQ(ierr); 1396 navg = navg/size; 1397 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1401 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1402 MatInfo info; 1403 PetscBool inodes; 1404 1405 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1406 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1407 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1408 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1409 if (!inodes) { 1410 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1411 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1414 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1415 } 1416 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1420 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1421 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1423 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1426 PetscInt inodecount,inodelimit,*inodes; 1427 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1428 if (inodes) { 1429 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1430 } else { 1431 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1435 PetscFunctionReturn(0); 1436 } 1437 } else if (isbinary) { 1438 if (size == 1) { 1439 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1440 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1441 } else { 1442 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (isdraw) { 1446 PetscDraw draw; 1447 PetscBool isnull; 1448 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1449 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1450 if (isnull) PetscFunctionReturn(0); 1451 } 1452 1453 { 1454 /* assemble the entire matrix onto first processor. */ 1455 Mat A; 1456 Mat_SeqAIJ *Aloc; 1457 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1458 MatScalar *a; 1459 1460 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1461 if (!rank) { 1462 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1463 } else { 1464 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1465 } 1466 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1467 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1468 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1469 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1471 1472 /* copy over the A part */ 1473 Aloc = (Mat_SeqAIJ*)aij->A->data; 1474 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1475 row = mat->rmap->rstart; 1476 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1477 for (i=0; i<m; i++) { 1478 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1479 row++; 1480 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1481 } 1482 aj = Aloc->j; 1483 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1484 1485 /* copy over the B part */ 1486 Aloc = (Mat_SeqAIJ*)aij->B->data; 1487 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1488 row = mat->rmap->rstart; 1489 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1490 ct = cols; 1491 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1492 for (i=0; i<m; i++) { 1493 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1494 row++; 1495 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1496 } 1497 ierr = PetscFree(ct);CHKERRQ(ierr); 1498 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1499 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1500 /* 1501 Everyone has to call to draw the matrix since the graphics waits are 1502 synchronized across all processors that share the PetscDraw object 1503 */ 1504 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 if (!rank) { 1506 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1507 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1508 } 1509 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1517 { 1518 PetscErrorCode ierr; 1519 PetscBool iascii,isdraw,issocket,isbinary; 1520 1521 PetscFunctionBegin; 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1524 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1525 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1526 if (iascii || isdraw || isbinary || issocket) { 1527 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1533 { 1534 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1535 PetscErrorCode ierr; 1536 Vec bb1 = 0; 1537 PetscBool hasop; 1538 1539 PetscFunctionBegin; 1540 if (flag == SOR_APPLY_UPPER) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1546 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1547 } 1548 1549 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1550 if (flag & SOR_ZERO_INITIAL_GUESS) { 1551 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1552 its--; 1553 } 1554 1555 while (its--) { 1556 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 1559 /* update rhs: bb1 = bb - B*x */ 1560 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1561 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1562 1563 /* local sweep */ 1564 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1565 } 1566 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1567 if (flag & SOR_ZERO_INITIAL_GUESS) { 1568 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1569 its--; 1570 } 1571 while (its--) { 1572 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1574 1575 /* update rhs: bb1 = bb - B*x */ 1576 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1577 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1578 1579 /* local sweep */ 1580 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1581 } 1582 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 while (its--) { 1588 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 1591 /* update rhs: bb1 = bb - B*x */ 1592 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1593 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1594 1595 /* local sweep */ 1596 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1597 } 1598 } else if (flag & SOR_EISENSTAT) { 1599 Vec xx1; 1600 1601 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1602 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1603 1604 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 if (!mat->diag) { 1607 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1608 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1609 } 1610 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1611 if (hasop) { 1612 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1613 } else { 1614 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1615 } 1616 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1617 1618 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1619 1620 /* local sweep */ 1621 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1622 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1623 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1624 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1625 1626 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1627 1628 matin->factorerrortype = mat->A->factorerrortype; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1633 { 1634 Mat aA,aB,Aperm; 1635 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1636 PetscScalar *aa,*ba; 1637 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1638 PetscSF rowsf,sf; 1639 IS parcolp = NULL; 1640 PetscBool done; 1641 PetscErrorCode ierr; 1642 1643 PetscFunctionBegin; 1644 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1645 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1646 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1647 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1648 1649 /* Invert row permutation to find out where my rows should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1653 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1654 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 1657 /* Invert column permutation to find out where my columns should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1661 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1662 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1665 1666 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1667 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1668 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1669 1670 /* Find out where my gcols should go */ 1671 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1677 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1678 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1679 1680 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1681 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 for (i=0; i<m; i++) { 1684 PetscInt row = rdest[i],rowner; 1685 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1686 for (j=ai[i]; j<ai[i+1]; j++) { 1687 PetscInt cowner,col = cdest[aj[j]]; 1688 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1689 if (rowner == cowner) dnnz[i]++; 1690 else onnz[i]++; 1691 } 1692 for (j=bi[i]; j<bi[i+1]; j++) { 1693 PetscInt cowner,col = gcdest[bj[j]]; 1694 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1695 if (rowner == cowner) dnnz[i]++; 1696 else onnz[i]++; 1697 } 1698 } 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1701 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1702 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1703 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1704 1705 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1706 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1707 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1708 for (i=0; i<m; i++) { 1709 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1710 PetscInt j0,rowlen; 1711 rowlen = ai[i+1] - ai[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 rowlen = bi[i+1] - bi[i]; 1717 for (j0=j=0; j<rowlen; j0=j) { 1718 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1719 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1720 } 1721 } 1722 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1723 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1724 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1725 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1726 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1727 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1728 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1729 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1730 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1731 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1732 *B = Aperm; 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1743 if (ghosts) *ghosts = aij->garray; 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 Mat A = mat->A,B = mat->B; 1751 PetscErrorCode ierr; 1752 PetscReal isend[5],irecv[5]; 1753 1754 PetscFunctionBegin; 1755 info->block_size = 1.0; 1756 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1759 isend[3] = info->memory; isend[4] = info->mallocs; 1760 1761 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1762 1763 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1764 isend[3] += info->memory; isend[4] += info->mallocs; 1765 if (flag == MAT_LOCAL) { 1766 info->nz_used = isend[0]; 1767 info->nz_allocated = isend[1]; 1768 info->nz_unneeded = isend[2]; 1769 info->memory = isend[3]; 1770 info->mallocs = isend[4]; 1771 } else if (flag == MAT_GLOBAL_MAX) { 1772 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1773 1774 info->nz_used = irecv[0]; 1775 info->nz_allocated = irecv[1]; 1776 info->nz_unneeded = irecv[2]; 1777 info->memory = irecv[3]; 1778 info->mallocs = irecv[4]; 1779 } else if (flag == MAT_GLOBAL_SUM) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } 1788 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1789 info->fill_ratio_needed = 0; 1790 info->factor_mallocs = 0; 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1795 { 1796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1797 PetscErrorCode ierr; 1798 1799 PetscFunctionBegin; 1800 switch (op) { 1801 case MAT_NEW_NONZERO_LOCATIONS: 1802 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1803 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1804 case MAT_KEEP_NONZERO_PATTERN: 1805 case MAT_NEW_NONZERO_LOCATION_ERR: 1806 case MAT_USE_INODES: 1807 case MAT_IGNORE_ZERO_ENTRIES: 1808 MatCheckPreallocated(A,1); 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_ROW_ORIENTED: 1813 MatCheckPreallocated(A,1); 1814 a->roworiented = flg; 1815 1816 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1817 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1818 break; 1819 case MAT_NEW_DIAGONALS: 1820 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1821 break; 1822 case MAT_IGNORE_OFF_PROC_ENTRIES: 1823 a->donotstash = flg; 1824 break; 1825 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1826 case MAT_SPD: 1827 case MAT_SYMMETRIC: 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 case MAT_HERMITIAN: 1830 case MAT_SYMMETRY_ETERNAL: 1831 break; 1832 case MAT_SUBMAT_SINGLEIS: 1833 A->submat_singleis = flg; 1834 break; 1835 case MAT_STRUCTURE_ONLY: 1836 /* The option is handled directly by MatSetOption() */ 1837 break; 1838 case MAT_REUSE: 1839 /* The option is handled directly by MatSetOption() */ 1840 break; 1841 default: 1842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1843 } 1844 PetscFunctionReturn(0); 1845 } 1846 1847 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1848 { 1849 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1850 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1851 PetscErrorCode ierr; 1852 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1853 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1854 PetscInt *cmap,*idx_p; 1855 1856 PetscFunctionBegin; 1857 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1858 mat->getrowactive = PETSC_TRUE; 1859 1860 if (!mat->rowvalues && (idx || v)) { 1861 /* 1862 allocate enough space to hold information from the longest row. 1863 */ 1864 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1865 PetscInt max = 1,tmp; 1866 for (i=0; i<matin->rmap->n; i++) { 1867 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1868 if (max < tmp) max = tmp; 1869 } 1870 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1871 } 1872 1873 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1874 lrow = row - rstart; 1875 1876 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1877 if (!v) {pvA = 0; pvB = 0;} 1878 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1879 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1880 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1881 nztot = nzA + nzB; 1882 1883 cmap = mat->garray; 1884 if (v || idx) { 1885 if (nztot) { 1886 /* Sort by increasing column numbers, assuming A and B already sorted */ 1887 PetscInt imark = -1; 1888 if (v) { 1889 *v = v_p = mat->rowvalues; 1890 for (i=0; i<nzB; i++) { 1891 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1892 else break; 1893 } 1894 imark = i; 1895 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1896 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1897 } 1898 if (idx) { 1899 *idx = idx_p = mat->rowindices; 1900 if (imark > -1) { 1901 for (i=0; i<imark; i++) { 1902 idx_p[i] = cmap[cworkB[i]]; 1903 } 1904 } else { 1905 for (i=0; i<nzB; i++) { 1906 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1907 else break; 1908 } 1909 imark = i; 1910 } 1911 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1912 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1913 } 1914 } else { 1915 if (idx) *idx = 0; 1916 if (v) *v = 0; 1917 } 1918 } 1919 *nz = nztot; 1920 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1921 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1922 PetscFunctionReturn(0); 1923 } 1924 1925 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1926 { 1927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1928 1929 PetscFunctionBegin; 1930 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1931 aij->getrowactive = PETSC_FALSE; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1936 { 1937 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1938 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1939 PetscErrorCode ierr; 1940 PetscInt i,j,cstart = mat->cmap->rstart; 1941 PetscReal sum = 0.0; 1942 MatScalar *v; 1943 1944 PetscFunctionBegin; 1945 if (aij->size == 1) { 1946 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1947 } else { 1948 if (type == NORM_FROBENIUS) { 1949 v = amat->a; 1950 for (i=0; i<amat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 v = bmat->a; 1954 for (i=0; i<bmat->nz; i++) { 1955 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1956 } 1957 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1958 *norm = PetscSqrtReal(*norm); 1959 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1960 } else if (type == NORM_1) { /* max column norm */ 1961 PetscReal *tmp,*tmp2; 1962 PetscInt *jj,*garray = aij->garray; 1963 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1964 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1965 *norm = 0.0; 1966 v = amat->a; jj = amat->j; 1967 for (j=0; j<amat->nz; j++) { 1968 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1969 } 1970 v = bmat->a; jj = bmat->j; 1971 for (j=0; j<bmat->nz; j++) { 1972 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1973 } 1974 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1975 for (j=0; j<mat->cmap->N; j++) { 1976 if (tmp2[j] > *norm) *norm = tmp2[j]; 1977 } 1978 ierr = PetscFree(tmp);CHKERRQ(ierr); 1979 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1980 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1981 } else if (type == NORM_INFINITY) { /* max row norm */ 1982 PetscReal ntemp = 0.0; 1983 for (j=0; j<aij->A->rmap->n; j++) { 1984 v = amat->a + amat->i[j]; 1985 sum = 0.0; 1986 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 v = bmat->a + bmat->i[j]; 1990 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1991 sum += PetscAbsScalar(*v); v++; 1992 } 1993 if (sum > ntemp) ntemp = sum; 1994 } 1995 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1996 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1997 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2003 { 2004 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2005 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2006 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2007 PetscErrorCode ierr; 2008 Mat B,A_diag,*B_diag; 2009 MatScalar *array; 2010 2011 PetscFunctionBegin; 2012 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2013 ai = Aloc->i; aj = Aloc->j; 2014 bi = Bloc->i; bj = Bloc->j; 2015 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2016 PetscInt *d_nnz,*g_nnz,*o_nnz; 2017 PetscSFNode *oloc; 2018 PETSC_UNUSED PetscSF sf; 2019 2020 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2021 /* compute d_nnz for preallocation */ 2022 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2023 for (i=0; i<ai[ma]; i++) { 2024 d_nnz[aj[i]]++; 2025 } 2026 /* compute local off-diagonal contributions */ 2027 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2028 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2029 /* map those to global */ 2030 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2031 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2032 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2033 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2034 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2035 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2036 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2037 2038 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2039 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2040 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2041 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2042 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2043 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2044 } else { 2045 B = *matout; 2046 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2047 } 2048 2049 b = (Mat_MPIAIJ*)B->data; 2050 A_diag = a->A; 2051 B_diag = &b->A; 2052 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2053 A_diag_ncol = A_diag->cmap->N; 2054 B_diag_ilen = sub_B_diag->ilen; 2055 B_diag_i = sub_B_diag->i; 2056 2057 /* Set ilen for diagonal of B */ 2058 for (i=0; i<A_diag_ncol; i++) { 2059 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2060 } 2061 2062 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2063 very quickly (=without using MatSetValues), because all writes are local. */ 2064 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2065 2066 /* copy over the B part */ 2067 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2068 array = Bloc->a; 2069 row = A->rmap->rstart; 2070 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2071 cols_tmp = cols; 2072 for (i=0; i<mb; i++) { 2073 ncol = bi[i+1]-bi[i]; 2074 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2075 row++; 2076 array += ncol; cols_tmp += ncol; 2077 } 2078 ierr = PetscFree(cols);CHKERRQ(ierr); 2079 2080 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2082 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2083 *matout = B; 2084 } else { 2085 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2091 { 2092 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2093 Mat a = aij->A,b = aij->B; 2094 PetscErrorCode ierr; 2095 PetscInt s1,s2,s3; 2096 2097 PetscFunctionBegin; 2098 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2099 if (rr) { 2100 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2101 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2102 /* Overlap communication with computation. */ 2103 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2104 } 2105 if (ll) { 2106 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2107 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2108 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2109 } 2110 /* scale the diagonal block */ 2111 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2112 2113 if (rr) { 2114 /* Do a scatter end and then right scale the off-diagonal block */ 2115 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2122 { 2123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2124 PetscErrorCode ierr; 2125 2126 PetscFunctionBegin; 2127 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2132 { 2133 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2134 Mat a,b,c,d; 2135 PetscBool flg; 2136 PetscErrorCode ierr; 2137 2138 PetscFunctionBegin; 2139 a = matA->A; b = matA->B; 2140 c = matB->A; d = matB->B; 2141 2142 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2143 if (flg) { 2144 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2145 } 2146 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2151 { 2152 PetscErrorCode ierr; 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2155 2156 PetscFunctionBegin; 2157 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2158 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2159 /* because of the column compression in the off-processor part of the matrix a->B, 2160 the number of columns in a->B and b->B may be different, hence we cannot call 2161 the MatCopy() directly on the two parts. If need be, we can provide a more 2162 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2163 then copying the submatrices */ 2164 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2165 } else { 2166 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2167 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2168 } 2169 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2174 { 2175 PetscErrorCode ierr; 2176 2177 PetscFunctionBegin; 2178 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /* 2183 Computes the number of nonzeros per row needed for preallocation when X and Y 2184 have different nonzero structure. 2185 */ 2186 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2187 { 2188 PetscInt i,j,k,nzx,nzy; 2189 2190 PetscFunctionBegin; 2191 /* Set the number of nonzeros in the new matrix */ 2192 for (i=0; i<m; i++) { 2193 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2194 nzx = xi[i+1] - xi[i]; 2195 nzy = yi[i+1] - yi[i]; 2196 nnz[i] = 0; 2197 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2198 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2199 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2200 nnz[i]++; 2201 } 2202 for (; k<nzy; k++) nnz[i]++; 2203 } 2204 PetscFunctionReturn(0); 2205 } 2206 2207 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2208 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2209 { 2210 PetscErrorCode ierr; 2211 PetscInt m = Y->rmap->N; 2212 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2213 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2214 2215 PetscFunctionBegin; 2216 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2221 { 2222 PetscErrorCode ierr; 2223 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2224 PetscBLASInt bnz,one=1; 2225 Mat_SeqAIJ *x,*y; 2226 2227 PetscFunctionBegin; 2228 if (str == SAME_NONZERO_PATTERN) { 2229 PetscScalar alpha = a; 2230 x = (Mat_SeqAIJ*)xx->A->data; 2231 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2232 y = (Mat_SeqAIJ*)yy->A->data; 2233 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2234 x = (Mat_SeqAIJ*)xx->B->data; 2235 y = (Mat_SeqAIJ*)yy->B->data; 2236 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2237 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2238 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2239 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2240 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2241 } else { 2242 Mat B; 2243 PetscInt *nnz_d,*nnz_o; 2244 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2245 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2246 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2247 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2248 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2249 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2250 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2254 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2255 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2256 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2263 2264 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2265 { 2266 #if defined(PETSC_USE_COMPLEX) 2267 PetscErrorCode ierr; 2268 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2269 2270 PetscFunctionBegin; 2271 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2272 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2273 #else 2274 PetscFunctionBegin; 2275 #endif 2276 PetscFunctionReturn(0); 2277 } 2278 2279 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2280 { 2281 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2282 PetscErrorCode ierr; 2283 2284 PetscFunctionBegin; 2285 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2286 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2287 PetscFunctionReturn(0); 2288 } 2289 2290 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2297 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2302 { 2303 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2304 PetscErrorCode ierr; 2305 PetscInt i,*idxb = 0; 2306 PetscScalar *va,*vb; 2307 Vec vtmp; 2308 2309 PetscFunctionBegin; 2310 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2311 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2312 if (idx) { 2313 for (i=0; i<A->rmap->n; i++) { 2314 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2315 } 2316 } 2317 2318 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2319 if (idx) { 2320 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2321 } 2322 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2323 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2324 2325 for (i=0; i<A->rmap->n; i++) { 2326 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2327 va[i] = vb[i]; 2328 if (idx) idx[i] = a->garray[idxb[i]]; 2329 } 2330 } 2331 2332 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2333 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2334 ierr = PetscFree(idxb);CHKERRQ(ierr); 2335 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2340 { 2341 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2342 PetscErrorCode ierr; 2343 PetscInt i,*idxb = 0; 2344 PetscScalar *va,*vb; 2345 Vec vtmp; 2346 2347 PetscFunctionBegin; 2348 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2349 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2350 if (idx) { 2351 for (i=0; i<A->cmap->n; i++) { 2352 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2353 } 2354 } 2355 2356 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2357 if (idx) { 2358 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2359 } 2360 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2361 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2362 2363 for (i=0; i<A->rmap->n; i++) { 2364 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2365 va[i] = vb[i]; 2366 if (idx) idx[i] = a->garray[idxb[i]]; 2367 } 2368 } 2369 2370 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2371 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2372 ierr = PetscFree(idxb);CHKERRQ(ierr); 2373 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2374 PetscFunctionReturn(0); 2375 } 2376 2377 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2378 { 2379 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2380 PetscInt n = A->rmap->n; 2381 PetscInt cstart = A->cmap->rstart; 2382 PetscInt *cmap = mat->garray; 2383 PetscInt *diagIdx, *offdiagIdx; 2384 Vec diagV, offdiagV; 2385 PetscScalar *a, *diagA, *offdiagA; 2386 PetscInt r; 2387 PetscErrorCode ierr; 2388 2389 PetscFunctionBegin; 2390 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2391 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2393 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2394 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2395 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2396 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2397 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2398 for (r = 0; r < n; ++r) { 2399 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2400 a[r] = diagA[r]; 2401 idx[r] = cstart + diagIdx[r]; 2402 } else { 2403 a[r] = offdiagA[r]; 2404 idx[r] = cmap[offdiagIdx[r]]; 2405 } 2406 } 2407 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2408 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2409 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2410 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2411 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2412 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2417 { 2418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2419 PetscInt n = A->rmap->n; 2420 PetscInt cstart = A->cmap->rstart; 2421 PetscInt *cmap = mat->garray; 2422 PetscInt *diagIdx, *offdiagIdx; 2423 Vec diagV, offdiagV; 2424 PetscScalar *a, *diagA, *offdiagA; 2425 PetscInt r; 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2430 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2432 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2433 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 for (r = 0; r < n; ++r) { 2438 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 idx[r] = cstart + diagIdx[r]; 2441 } else { 2442 a[r] = offdiagA[r]; 2443 idx[r] = cmap[offdiagIdx[r]]; 2444 } 2445 } 2446 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2450 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2451 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2456 { 2457 PetscErrorCode ierr; 2458 Mat *dummy; 2459 2460 PetscFunctionBegin; 2461 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2462 *newmat = *dummy; 2463 ierr = PetscFree(dummy);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2468 { 2469 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2474 A->factorerrortype = a->A->factorerrortype; 2475 PetscFunctionReturn(0); 2476 } 2477 2478 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2479 { 2480 PetscErrorCode ierr; 2481 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2482 2483 PetscFunctionBegin; 2484 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2485 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2486 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2487 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2492 { 2493 PetscFunctionBegin; 2494 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2495 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 /*@ 2500 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2501 2502 Collective on Mat 2503 2504 Input Parameters: 2505 + A - the matrix 2506 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2507 2508 Level: advanced 2509 2510 @*/ 2511 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2512 { 2513 PetscErrorCode ierr; 2514 2515 PetscFunctionBegin; 2516 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2521 { 2522 PetscErrorCode ierr; 2523 PetscBool sc = PETSC_FALSE,flg; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2527 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2528 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2529 if (flg) { 2530 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2531 } 2532 ierr = PetscOptionsTail();CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2537 { 2538 PetscErrorCode ierr; 2539 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2540 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2541 2542 PetscFunctionBegin; 2543 if (!Y->preallocated) { 2544 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2545 } else if (!aij->nz) { 2546 PetscInt nonew = aij->nonew; 2547 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2548 aij->nonew = nonew; 2549 } 2550 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2555 { 2556 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2561 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2562 if (d) { 2563 PetscInt rstart; 2564 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2565 *d += rstart; 2566 2567 } 2568 PetscFunctionReturn(0); 2569 } 2570 2571 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2574 PetscErrorCode ierr; 2575 2576 PetscFunctionBegin; 2577 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2578 PetscFunctionReturn(0); 2579 } 2580 2581 /* -------------------------------------------------------------------*/ 2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2583 MatGetRow_MPIAIJ, 2584 MatRestoreRow_MPIAIJ, 2585 MatMult_MPIAIJ, 2586 /* 4*/ MatMultAdd_MPIAIJ, 2587 MatMultTranspose_MPIAIJ, 2588 MatMultTransposeAdd_MPIAIJ, 2589 0, 2590 0, 2591 0, 2592 /*10*/ 0, 2593 0, 2594 0, 2595 MatSOR_MPIAIJ, 2596 MatTranspose_MPIAIJ, 2597 /*15*/ MatGetInfo_MPIAIJ, 2598 MatEqual_MPIAIJ, 2599 MatGetDiagonal_MPIAIJ, 2600 MatDiagonalScale_MPIAIJ, 2601 MatNorm_MPIAIJ, 2602 /*20*/ MatAssemblyBegin_MPIAIJ, 2603 MatAssemblyEnd_MPIAIJ, 2604 MatSetOption_MPIAIJ, 2605 MatZeroEntries_MPIAIJ, 2606 /*24*/ MatZeroRows_MPIAIJ, 2607 0, 2608 0, 2609 0, 2610 0, 2611 /*29*/ MatSetUp_MPIAIJ, 2612 0, 2613 0, 2614 MatGetDiagonalBlock_MPIAIJ, 2615 0, 2616 /*34*/ MatDuplicate_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*39*/ MatAXPY_MPIAIJ, 2622 MatCreateSubMatrices_MPIAIJ, 2623 MatIncreaseOverlap_MPIAIJ, 2624 MatGetValues_MPIAIJ, 2625 MatCopy_MPIAIJ, 2626 /*44*/ MatGetRowMax_MPIAIJ, 2627 MatScale_MPIAIJ, 2628 MatShift_MPIAIJ, 2629 MatDiagonalSet_MPIAIJ, 2630 MatZeroRowsColumns_MPIAIJ, 2631 /*49*/ MatSetRandom_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 0, 2636 /*54*/ MatFDColoringCreate_MPIXAIJ, 2637 0, 2638 MatSetUnfactored_MPIAIJ, 2639 MatPermute_MPIAIJ, 2640 0, 2641 /*59*/ MatCreateSubMatrix_MPIAIJ, 2642 MatDestroy_MPIAIJ, 2643 MatView_MPIAIJ, 2644 0, 2645 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2646 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2647 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2652 MatGetRowMinAbs_MPIAIJ, 2653 0, 2654 0, 2655 0, 2656 0, 2657 /*75*/ MatFDColoringApply_AIJ, 2658 MatSetFromOptions_MPIAIJ, 2659 0, 2660 0, 2661 MatFindZeroDiagonals_MPIAIJ, 2662 /*80*/ 0, 2663 0, 2664 0, 2665 /*83*/ MatLoad_MPIAIJ, 2666 MatIsSymmetric_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2672 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2673 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2674 MatPtAP_MPIAIJ_MPIAIJ, 2675 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2676 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*99*/ 0, 2682 0, 2683 0, 2684 MatConjugate_MPIAIJ, 2685 0, 2686 /*104*/MatSetValuesRow_MPIAIJ, 2687 MatRealPart_MPIAIJ, 2688 MatImaginaryPart_MPIAIJ, 2689 0, 2690 0, 2691 /*109*/0, 2692 0, 2693 MatGetRowMin_MPIAIJ, 2694 0, 2695 MatMissingDiagonal_MPIAIJ, 2696 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2697 0, 2698 MatGetGhosts_MPIAIJ, 2699 0, 2700 0, 2701 /*119*/0, 2702 0, 2703 0, 2704 0, 2705 MatGetMultiProcBlock_MPIAIJ, 2706 /*124*/MatFindNonzeroRows_MPIAIJ, 2707 MatGetColumnNorms_MPIAIJ, 2708 MatInvertBlockDiagonal_MPIAIJ, 2709 MatInvertVariableBlockDiagonal_MPIAIJ, 2710 MatCreateSubMatricesMPI_MPIAIJ, 2711 /*129*/0, 2712 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2713 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2715 0, 2716 /*134*/0, 2717 0, 2718 MatRARt_MPIAIJ_MPIAIJ, 2719 0, 2720 0, 2721 /*139*/MatSetBlockSizes_MPIAIJ, 2722 0, 2723 0, 2724 MatFDColoringSetUp_MPIXAIJ, 2725 MatFindOffBlockDiagonalEntries_MPIAIJ, 2726 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2727 }; 2728 2729 /* ----------------------------------------------------------------------------------------*/ 2730 2731 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2732 { 2733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2734 PetscErrorCode ierr; 2735 2736 PetscFunctionBegin; 2737 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2738 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2739 PetscFunctionReturn(0); 2740 } 2741 2742 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2743 { 2744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2749 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2754 { 2755 Mat_MPIAIJ *b; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2760 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2761 b = (Mat_MPIAIJ*)B->data; 2762 2763 #if defined(PETSC_USE_CTABLE) 2764 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2765 #else 2766 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2767 #endif 2768 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2769 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2770 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2771 2772 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2773 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2776 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2777 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2779 2780 if (!B->preallocated) { 2781 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2782 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2783 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2784 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2785 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2786 } 2787 2788 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2789 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2790 B->preallocated = PETSC_TRUE; 2791 B->was_assembled = PETSC_FALSE; 2792 B->assembled = PETSC_FALSE;; 2793 PetscFunctionReturn(0); 2794 } 2795 2796 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2797 { 2798 Mat_MPIAIJ *b; 2799 PetscErrorCode ierr; 2800 2801 PetscFunctionBegin; 2802 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2803 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2804 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2805 b = (Mat_MPIAIJ*)B->data; 2806 2807 #if defined(PETSC_USE_CTABLE) 2808 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2809 #else 2810 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2811 #endif 2812 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2813 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2814 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2815 2816 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2817 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2818 B->preallocated = PETSC_TRUE; 2819 B->was_assembled = PETSC_FALSE; 2820 B->assembled = PETSC_FALSE; 2821 PetscFunctionReturn(0); 2822 } 2823 2824 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2825 { 2826 Mat mat; 2827 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2828 PetscErrorCode ierr; 2829 2830 PetscFunctionBegin; 2831 *newmat = 0; 2832 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2833 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2834 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2835 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2836 a = (Mat_MPIAIJ*)mat->data; 2837 2838 mat->factortype = matin->factortype; 2839 mat->assembled = PETSC_TRUE; 2840 mat->insertmode = NOT_SET_VALUES; 2841 mat->preallocated = PETSC_TRUE; 2842 2843 a->size = oldmat->size; 2844 a->rank = oldmat->rank; 2845 a->donotstash = oldmat->donotstash; 2846 a->roworiented = oldmat->roworiented; 2847 a->rowindices = 0; 2848 a->rowvalues = 0; 2849 a->getrowactive = PETSC_FALSE; 2850 2851 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2852 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2853 2854 if (oldmat->colmap) { 2855 #if defined(PETSC_USE_CTABLE) 2856 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2857 #else 2858 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2859 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2860 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2861 #endif 2862 } else a->colmap = 0; 2863 if (oldmat->garray) { 2864 PetscInt len; 2865 len = oldmat->B->cmap->n; 2866 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2867 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2868 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2869 } else a->garray = 0; 2870 2871 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2872 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2873 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2874 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2875 2876 if (oldmat->Mvctx_mpi1) { 2877 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2878 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2879 } 2880 2881 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2882 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2883 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2885 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2886 *newmat = mat; 2887 PetscFunctionReturn(0); 2888 } 2889 2890 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2891 { 2892 PetscBool isbinary, ishdf5; 2893 PetscErrorCode ierr; 2894 2895 PetscFunctionBegin; 2896 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2897 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2898 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2899 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2900 if (isbinary) { 2901 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2902 } else if (ishdf5) { 2903 #if defined(PETSC_HAVE_HDF5) 2904 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2905 #else 2906 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2907 #endif 2908 } else { 2909 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2910 } 2911 PetscFunctionReturn(0); 2912 } 2913 2914 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2915 { 2916 PetscScalar *vals,*svals; 2917 MPI_Comm comm; 2918 PetscErrorCode ierr; 2919 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2920 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2921 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2922 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2923 PetscInt cend,cstart,n,*rowners; 2924 int fd; 2925 PetscInt bs = newMat->rmap->bs; 2926 2927 PetscFunctionBegin; 2928 /* force binary viewer to load .info file if it has not yet done so */ 2929 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2930 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2931 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2932 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2933 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2934 if (!rank) { 2935 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2936 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2937 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2938 } 2939 2940 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2941 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2942 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2943 if (bs < 0) bs = 1; 2944 2945 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2946 M = header[1]; N = header[2]; 2947 2948 /* If global sizes are set, check if they are consistent with that given in the file */ 2949 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2950 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2951 2952 /* determine ownership of all (block) rows */ 2953 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2954 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2955 else m = newMat->rmap->n; /* Set by user */ 2956 2957 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2958 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2959 2960 /* First process needs enough room for process with most rows */ 2961 if (!rank) { 2962 mmax = rowners[1]; 2963 for (i=2; i<=size; i++) { 2964 mmax = PetscMax(mmax, rowners[i]); 2965 } 2966 } else mmax = -1; /* unused, but compilers complain */ 2967 2968 rowners[0] = 0; 2969 for (i=2; i<=size; i++) { 2970 rowners[i] += rowners[i-1]; 2971 } 2972 rstart = rowners[rank]; 2973 rend = rowners[rank+1]; 2974 2975 /* distribute row lengths to all processors */ 2976 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2977 if (!rank) { 2978 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2979 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2980 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2981 for (j=0; j<m; j++) { 2982 procsnz[0] += ourlens[j]; 2983 } 2984 for (i=1; i<size; i++) { 2985 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2986 /* calculate the number of nonzeros on each processor */ 2987 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2988 procsnz[i] += rowlengths[j]; 2989 } 2990 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2991 } 2992 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2993 } else { 2994 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2995 } 2996 2997 if (!rank) { 2998 /* determine max buffer needed and allocate it */ 2999 maxnz = 0; 3000 for (i=0; i<size; i++) { 3001 maxnz = PetscMax(maxnz,procsnz[i]); 3002 } 3003 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3004 3005 /* read in my part of the matrix column indices */ 3006 nz = procsnz[0]; 3007 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3008 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3009 3010 /* read in every one elses and ship off */ 3011 for (i=1; i<size; i++) { 3012 nz = procsnz[i]; 3013 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3014 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3015 } 3016 ierr = PetscFree(cols);CHKERRQ(ierr); 3017 } else { 3018 /* determine buffer space needed for message */ 3019 nz = 0; 3020 for (i=0; i<m; i++) { 3021 nz += ourlens[i]; 3022 } 3023 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3024 3025 /* receive message of column indices*/ 3026 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3027 } 3028 3029 /* determine column ownership if matrix is not square */ 3030 if (N != M) { 3031 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3032 else n = newMat->cmap->n; 3033 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3034 cstart = cend - n; 3035 } else { 3036 cstart = rstart; 3037 cend = rend; 3038 n = cend - cstart; 3039 } 3040 3041 /* loop over local rows, determining number of off diagonal entries */ 3042 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3043 jj = 0; 3044 for (i=0; i<m; i++) { 3045 for (j=0; j<ourlens[i]; j++) { 3046 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3047 jj++; 3048 } 3049 } 3050 3051 for (i=0; i<m; i++) { 3052 ourlens[i] -= offlens[i]; 3053 } 3054 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3055 3056 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3057 3058 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3059 3060 for (i=0; i<m; i++) { 3061 ourlens[i] += offlens[i]; 3062 } 3063 3064 if (!rank) { 3065 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3066 3067 /* read in my part of the matrix numerical values */ 3068 nz = procsnz[0]; 3069 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3070 3071 /* insert into matrix */ 3072 jj = rstart; 3073 smycols = mycols; 3074 svals = vals; 3075 for (i=0; i<m; i++) { 3076 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3077 smycols += ourlens[i]; 3078 svals += ourlens[i]; 3079 jj++; 3080 } 3081 3082 /* read in other processors and ship out */ 3083 for (i=1; i<size; i++) { 3084 nz = procsnz[i]; 3085 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3086 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3087 } 3088 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3089 } else { 3090 /* receive numeric values */ 3091 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3092 3093 /* receive message of values*/ 3094 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3095 3096 /* insert into matrix */ 3097 jj = rstart; 3098 smycols = mycols; 3099 svals = vals; 3100 for (i=0; i<m; i++) { 3101 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3102 smycols += ourlens[i]; 3103 svals += ourlens[i]; 3104 jj++; 3105 } 3106 } 3107 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3108 ierr = PetscFree(vals);CHKERRQ(ierr); 3109 ierr = PetscFree(mycols);CHKERRQ(ierr); 3110 ierr = PetscFree(rowners);CHKERRQ(ierr); 3111 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 /* Not scalable because of ISAllGather() unless getting all columns. */ 3117 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3118 { 3119 PetscErrorCode ierr; 3120 IS iscol_local; 3121 PetscBool isstride; 3122 PetscMPIInt lisstride=0,gisstride; 3123 3124 PetscFunctionBegin; 3125 /* check if we are grabbing all columns*/ 3126 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3127 3128 if (isstride) { 3129 PetscInt start,len,mstart,mlen; 3130 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3131 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3132 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3133 if (mstart == start && mlen-mstart == len) lisstride = 1; 3134 } 3135 3136 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3137 if (gisstride) { 3138 PetscInt N; 3139 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3140 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3141 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3142 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3143 } else { 3144 PetscInt cbs; 3145 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3146 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3147 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3148 } 3149 3150 *isseq = iscol_local; 3151 PetscFunctionReturn(0); 3152 } 3153 3154 /* 3155 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3156 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3157 3158 Input Parameters: 3159 mat - matrix 3160 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3161 i.e., mat->rstart <= isrow[i] < mat->rend 3162 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3163 i.e., mat->cstart <= iscol[i] < mat->cend 3164 Output Parameter: 3165 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3166 iscol_o - sequential column index set for retrieving mat->B 3167 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3168 */ 3169 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3170 { 3171 PetscErrorCode ierr; 3172 Vec x,cmap; 3173 const PetscInt *is_idx; 3174 PetscScalar *xarray,*cmaparray; 3175 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3176 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3177 Mat B=a->B; 3178 Vec lvec=a->lvec,lcmap; 3179 PetscInt i,cstart,cend,Bn=B->cmap->N; 3180 MPI_Comm comm; 3181 VecScatter Mvctx=a->Mvctx; 3182 3183 PetscFunctionBegin; 3184 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3185 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3186 3187 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3188 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3189 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3190 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3191 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3192 3193 /* Get start indices */ 3194 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3195 isstart -= ncols; 3196 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3197 3198 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3199 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3200 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3201 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3202 for (i=0; i<ncols; i++) { 3203 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3204 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3205 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3206 } 3207 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3208 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3209 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3210 3211 /* Get iscol_d */ 3212 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3213 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3214 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3215 3216 /* Get isrow_d */ 3217 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3218 rstart = mat->rmap->rstart; 3219 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3220 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3221 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3222 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3223 3224 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3225 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3226 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3227 3228 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3229 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3230 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3231 3232 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3233 3234 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 3237 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3238 /* off-process column indices */ 3239 count = 0; 3240 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3241 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3242 3243 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3244 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3245 for (i=0; i<Bn; i++) { 3246 if (PetscRealPart(xarray[i]) > -1.0) { 3247 idx[count] = i; /* local column index in off-diagonal part B */ 3248 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3249 count++; 3250 } 3251 } 3252 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3253 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3254 3255 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3256 /* cannot ensure iscol_o has same blocksize as iscol! */ 3257 3258 ierr = PetscFree(idx);CHKERRQ(ierr); 3259 *garray = cmap1; 3260 3261 ierr = VecDestroy(&x);CHKERRQ(ierr); 3262 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3263 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3264 PetscFunctionReturn(0); 3265 } 3266 3267 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3268 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3269 { 3270 PetscErrorCode ierr; 3271 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3272 Mat M = NULL; 3273 MPI_Comm comm; 3274 IS iscol_d,isrow_d,iscol_o; 3275 Mat Asub = NULL,Bsub = NULL; 3276 PetscInt n; 3277 3278 PetscFunctionBegin; 3279 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3280 3281 if (call == MAT_REUSE_MATRIX) { 3282 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3283 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3284 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3285 3286 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3287 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3288 3289 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3290 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3291 3292 /* Update diagonal and off-diagonal portions of submat */ 3293 asub = (Mat_MPIAIJ*)(*submat)->data; 3294 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3295 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3296 if (n) { 3297 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3298 } 3299 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3300 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 3302 } else { /* call == MAT_INITIAL_MATRIX) */ 3303 const PetscInt *garray; 3304 PetscInt BsubN; 3305 3306 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3307 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3308 3309 /* Create local submatrices Asub and Bsub */ 3310 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3311 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3312 3313 /* Create submatrix M */ 3314 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3315 3316 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3317 asub = (Mat_MPIAIJ*)M->data; 3318 3319 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3320 n = asub->B->cmap->N; 3321 if (BsubN > n) { 3322 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3323 const PetscInt *idx; 3324 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3325 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3326 3327 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3328 j = 0; 3329 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3330 for (i=0; i<n; i++) { 3331 if (j >= BsubN) break; 3332 while (subgarray[i] > garray[j]) j++; 3333 3334 if (subgarray[i] == garray[j]) { 3335 idx_new[i] = idx[j++]; 3336 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3337 } 3338 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3339 3340 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3341 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3342 3343 } else if (BsubN < n) { 3344 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3345 } 3346 3347 ierr = PetscFree(garray);CHKERRQ(ierr); 3348 *submat = M; 3349 3350 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3351 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3352 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3353 3354 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3355 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3356 3357 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3358 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3364 { 3365 PetscErrorCode ierr; 3366 IS iscol_local=NULL,isrow_d; 3367 PetscInt csize; 3368 PetscInt n,i,j,start,end; 3369 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3370 MPI_Comm comm; 3371 3372 PetscFunctionBegin; 3373 /* If isrow has same processor distribution as mat, 3374 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3375 if (call == MAT_REUSE_MATRIX) { 3376 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3377 if (isrow_d) { 3378 sameRowDist = PETSC_TRUE; 3379 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3380 } else { 3381 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3382 if (iscol_local) { 3383 sameRowDist = PETSC_TRUE; 3384 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3385 } 3386 } 3387 } else { 3388 /* Check if isrow has same processor distribution as mat */ 3389 sameDist[0] = PETSC_FALSE; 3390 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3391 if (!n) { 3392 sameDist[0] = PETSC_TRUE; 3393 } else { 3394 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3395 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3396 if (i >= start && j < end) { 3397 sameDist[0] = PETSC_TRUE; 3398 } 3399 } 3400 3401 /* Check if iscol has same processor distribution as mat */ 3402 sameDist[1] = PETSC_FALSE; 3403 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3404 if (!n) { 3405 sameDist[1] = PETSC_TRUE; 3406 } else { 3407 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3408 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3409 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3410 } 3411 3412 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3413 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3414 sameRowDist = tsameDist[0]; 3415 } 3416 3417 if (sameRowDist) { 3418 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3419 /* isrow and iscol have same processor distribution as mat */ 3420 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } else { /* sameRowDist */ 3423 /* isrow has same processor distribution as mat */ 3424 if (call == MAT_INITIAL_MATRIX) { 3425 PetscBool sorted; 3426 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3427 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3428 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3429 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3430 3431 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3432 if (sorted) { 3433 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3434 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 } else { /* call == MAT_REUSE_MATRIX */ 3438 IS iscol_sub; 3439 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3440 if (iscol_sub) { 3441 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3442 PetscFunctionReturn(0); 3443 } 3444 } 3445 } 3446 } 3447 3448 /* General case: iscol -> iscol_local which has global size of iscol */ 3449 if (call == MAT_REUSE_MATRIX) { 3450 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3451 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3452 } else { 3453 if (!iscol_local) { 3454 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3455 } 3456 } 3457 3458 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3459 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3460 3461 if (call == MAT_INITIAL_MATRIX) { 3462 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3463 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3464 } 3465 PetscFunctionReturn(0); 3466 } 3467 3468 /*@C 3469 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3470 and "off-diagonal" part of the matrix in CSR format. 3471 3472 Collective on MPI_Comm 3473 3474 Input Parameters: 3475 + comm - MPI communicator 3476 . A - "diagonal" portion of matrix 3477 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3478 - garray - global index of B columns 3479 3480 Output Parameter: 3481 . mat - the matrix, with input A as its local diagonal matrix 3482 Level: advanced 3483 3484 Notes: 3485 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3486 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3487 3488 .seealso: MatCreateMPIAIJWithSplitArrays() 3489 @*/ 3490 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3491 { 3492 PetscErrorCode ierr; 3493 Mat_MPIAIJ *maij; 3494 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3495 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3496 PetscScalar *oa=b->a; 3497 Mat Bnew; 3498 PetscInt m,n,N; 3499 3500 PetscFunctionBegin; 3501 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3502 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3503 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3504 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3505 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3506 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3507 3508 /* Get global columns of mat */ 3509 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3510 3511 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3512 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3513 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3514 maij = (Mat_MPIAIJ*)(*mat)->data; 3515 3516 (*mat)->preallocated = PETSC_TRUE; 3517 3518 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3519 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3520 3521 /* Set A as diagonal portion of *mat */ 3522 maij->A = A; 3523 3524 nz = oi[m]; 3525 for (i=0; i<nz; i++) { 3526 col = oj[i]; 3527 oj[i] = garray[col]; 3528 } 3529 3530 /* Set Bnew as off-diagonal portion of *mat */ 3531 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3532 bnew = (Mat_SeqAIJ*)Bnew->data; 3533 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3534 maij->B = Bnew; 3535 3536 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3537 3538 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3539 b->free_a = PETSC_FALSE; 3540 b->free_ij = PETSC_FALSE; 3541 ierr = MatDestroy(&B);CHKERRQ(ierr); 3542 3543 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3544 bnew->free_a = PETSC_TRUE; 3545 bnew->free_ij = PETSC_TRUE; 3546 3547 /* condense columns of maij->B */ 3548 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3549 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3550 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3551 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3552 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3557 3558 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3559 { 3560 PetscErrorCode ierr; 3561 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3562 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3563 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3564 Mat M,Msub,B=a->B; 3565 MatScalar *aa; 3566 Mat_SeqAIJ *aij; 3567 PetscInt *garray = a->garray,*colsub,Ncols; 3568 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3569 IS iscol_sub,iscmap; 3570 const PetscInt *is_idx,*cmap; 3571 PetscBool allcolumns=PETSC_FALSE; 3572 MPI_Comm comm; 3573 3574 PetscFunctionBegin; 3575 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3576 3577 if (call == MAT_REUSE_MATRIX) { 3578 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3579 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3580 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3581 3582 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3583 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3584 3585 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3586 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3587 3588 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3589 3590 } else { /* call == MAT_INITIAL_MATRIX) */ 3591 PetscBool flg; 3592 3593 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3594 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3595 3596 /* (1) iscol -> nonscalable iscol_local */ 3597 /* Check for special case: each processor gets entire matrix columns */ 3598 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3599 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3600 if (allcolumns) { 3601 iscol_sub = iscol_local; 3602 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3603 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3604 3605 } else { 3606 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3607 PetscInt *idx,*cmap1,k; 3608 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3609 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3610 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3611 count = 0; 3612 k = 0; 3613 for (i=0; i<Ncols; i++) { 3614 j = is_idx[i]; 3615 if (j >= cstart && j < cend) { 3616 /* diagonal part of mat */ 3617 idx[count] = j; 3618 cmap1[count++] = i; /* column index in submat */ 3619 } else if (Bn) { 3620 /* off-diagonal part of mat */ 3621 if (j == garray[k]) { 3622 idx[count] = j; 3623 cmap1[count++] = i; /* column index in submat */ 3624 } else if (j > garray[k]) { 3625 while (j > garray[k] && k < Bn-1) k++; 3626 if (j == garray[k]) { 3627 idx[count] = j; 3628 cmap1[count++] = i; /* column index in submat */ 3629 } 3630 } 3631 } 3632 } 3633 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3634 3635 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3636 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3637 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3638 3639 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3640 } 3641 3642 /* (3) Create sequential Msub */ 3643 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3644 } 3645 3646 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3647 aij = (Mat_SeqAIJ*)(Msub)->data; 3648 ii = aij->i; 3649 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3650 3651 /* 3652 m - number of local rows 3653 Ncols - number of columns (same on all processors) 3654 rstart - first row in new global matrix generated 3655 */ 3656 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3657 3658 if (call == MAT_INITIAL_MATRIX) { 3659 /* (4) Create parallel newmat */ 3660 PetscMPIInt rank,size; 3661 PetscInt csize; 3662 3663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3665 3666 /* 3667 Determine the number of non-zeros in the diagonal and off-diagonal 3668 portions of the matrix in order to do correct preallocation 3669 */ 3670 3671 /* first get start and end of "diagonal" columns */ 3672 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3673 if (csize == PETSC_DECIDE) { 3674 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3675 if (mglobal == Ncols) { /* square matrix */ 3676 nlocal = m; 3677 } else { 3678 nlocal = Ncols/size + ((Ncols % size) > rank); 3679 } 3680 } else { 3681 nlocal = csize; 3682 } 3683 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3684 rstart = rend - nlocal; 3685 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3686 3687 /* next, compute all the lengths */ 3688 jj = aij->j; 3689 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3690 olens = dlens + m; 3691 for (i=0; i<m; i++) { 3692 jend = ii[i+1] - ii[i]; 3693 olen = 0; 3694 dlen = 0; 3695 for (j=0; j<jend; j++) { 3696 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3697 else dlen++; 3698 jj++; 3699 } 3700 olens[i] = olen; 3701 dlens[i] = dlen; 3702 } 3703 3704 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3705 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3706 3707 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3708 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3709 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3710 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3711 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3712 ierr = PetscFree(dlens);CHKERRQ(ierr); 3713 3714 } else { /* call == MAT_REUSE_MATRIX */ 3715 M = *newmat; 3716 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3717 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3718 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3719 /* 3720 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3721 rather than the slower MatSetValues(). 3722 */ 3723 M->was_assembled = PETSC_TRUE; 3724 M->assembled = PETSC_FALSE; 3725 } 3726 3727 /* (5) Set values of Msub to *newmat */ 3728 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3729 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3730 3731 jj = aij->j; 3732 aa = aij->a; 3733 for (i=0; i<m; i++) { 3734 row = rstart + i; 3735 nz = ii[i+1] - ii[i]; 3736 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3737 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3738 jj += nz; aa += nz; 3739 } 3740 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3741 3742 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 3745 ierr = PetscFree(colsub);CHKERRQ(ierr); 3746 3747 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3748 if (call == MAT_INITIAL_MATRIX) { 3749 *newmat = M; 3750 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3751 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3752 3753 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3754 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3755 3756 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3757 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3758 3759 if (iscol_local) { 3760 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3761 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3762 } 3763 } 3764 PetscFunctionReturn(0); 3765 } 3766 3767 /* 3768 Not great since it makes two copies of the submatrix, first an SeqAIJ 3769 in local and then by concatenating the local matrices the end result. 3770 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3771 3772 Note: This requires a sequential iscol with all indices. 3773 */ 3774 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3775 { 3776 PetscErrorCode ierr; 3777 PetscMPIInt rank,size; 3778 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3779 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3780 Mat M,Mreuse; 3781 MatScalar *aa,*vwork; 3782 MPI_Comm comm; 3783 Mat_SeqAIJ *aij; 3784 PetscBool colflag,allcolumns=PETSC_FALSE; 3785 3786 PetscFunctionBegin; 3787 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3788 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3789 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3790 3791 /* Check for special case: each processor gets entire matrix columns */ 3792 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3793 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3794 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3795 3796 if (call == MAT_REUSE_MATRIX) { 3797 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3798 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3799 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3800 } else { 3801 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3802 } 3803 3804 /* 3805 m - number of local rows 3806 n - number of columns (same on all processors) 3807 rstart - first row in new global matrix generated 3808 */ 3809 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3810 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3811 if (call == MAT_INITIAL_MATRIX) { 3812 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3813 ii = aij->i; 3814 jj = aij->j; 3815 3816 /* 3817 Determine the number of non-zeros in the diagonal and off-diagonal 3818 portions of the matrix in order to do correct preallocation 3819 */ 3820 3821 /* first get start and end of "diagonal" columns */ 3822 if (csize == PETSC_DECIDE) { 3823 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3824 if (mglobal == n) { /* square matrix */ 3825 nlocal = m; 3826 } else { 3827 nlocal = n/size + ((n % size) > rank); 3828 } 3829 } else { 3830 nlocal = csize; 3831 } 3832 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3833 rstart = rend - nlocal; 3834 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3835 3836 /* next, compute all the lengths */ 3837 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3838 olens = dlens + m; 3839 for (i=0; i<m; i++) { 3840 jend = ii[i+1] - ii[i]; 3841 olen = 0; 3842 dlen = 0; 3843 for (j=0; j<jend; j++) { 3844 if (*jj < rstart || *jj >= rend) olen++; 3845 else dlen++; 3846 jj++; 3847 } 3848 olens[i] = olen; 3849 dlens[i] = dlen; 3850 } 3851 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3852 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3853 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3854 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3855 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3856 ierr = PetscFree(dlens);CHKERRQ(ierr); 3857 } else { 3858 PetscInt ml,nl; 3859 3860 M = *newmat; 3861 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3862 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3863 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3864 /* 3865 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3866 rather than the slower MatSetValues(). 3867 */ 3868 M->was_assembled = PETSC_TRUE; 3869 M->assembled = PETSC_FALSE; 3870 } 3871 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3872 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3873 ii = aij->i; 3874 jj = aij->j; 3875 aa = aij->a; 3876 for (i=0; i<m; i++) { 3877 row = rstart + i; 3878 nz = ii[i+1] - ii[i]; 3879 cwork = jj; jj += nz; 3880 vwork = aa; aa += nz; 3881 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3882 } 3883 3884 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 *newmat = M; 3887 3888 /* save submatrix used in processor for next request */ 3889 if (call == MAT_INITIAL_MATRIX) { 3890 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3891 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3892 } 3893 PetscFunctionReturn(0); 3894 } 3895 3896 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3897 { 3898 PetscInt m,cstart, cend,j,nnz,i,d; 3899 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3900 const PetscInt *JJ; 3901 PetscScalar *values; 3902 PetscErrorCode ierr; 3903 PetscBool nooffprocentries; 3904 3905 PetscFunctionBegin; 3906 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3907 3908 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3909 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3910 m = B->rmap->n; 3911 cstart = B->cmap->rstart; 3912 cend = B->cmap->rend; 3913 rstart = B->rmap->rstart; 3914 3915 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3916 3917 #if defined(PETSC_USE_DEBUG) 3918 for (i=0; i<m && Ii; i++) { 3919 nnz = Ii[i+1]- Ii[i]; 3920 JJ = J + Ii[i]; 3921 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3922 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3923 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3924 } 3925 #endif 3926 3927 for (i=0; i<m && Ii; i++) { 3928 nnz = Ii[i+1]- Ii[i]; 3929 JJ = J + Ii[i]; 3930 nnz_max = PetscMax(nnz_max,nnz); 3931 d = 0; 3932 for (j=0; j<nnz; j++) { 3933 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3934 } 3935 d_nnz[i] = d; 3936 o_nnz[i] = nnz - d; 3937 } 3938 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3939 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3940 3941 if (v) values = (PetscScalar*)v; 3942 else { 3943 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3944 } 3945 3946 for (i=0; i<m && Ii; i++) { 3947 ii = i + rstart; 3948 nnz = Ii[i+1]- Ii[i]; 3949 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3950 } 3951 nooffprocentries = B->nooffprocentries; 3952 B->nooffprocentries = PETSC_TRUE; 3953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3955 B->nooffprocentries = nooffprocentries; 3956 3957 if (!v) { 3958 ierr = PetscFree(values);CHKERRQ(ierr); 3959 } 3960 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3961 PetscFunctionReturn(0); 3962 } 3963 3964 /*@ 3965 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3966 (the default parallel PETSc format). 3967 3968 Collective on MPI_Comm 3969 3970 Input Parameters: 3971 + B - the matrix 3972 . i - the indices into j for the start of each local row (starts with zero) 3973 . j - the column indices for each local row (starts with zero) 3974 - v - optional values in the matrix 3975 3976 Level: developer 3977 3978 Notes: 3979 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3980 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3981 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3982 3983 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3984 3985 The format which is used for the sparse matrix input, is equivalent to a 3986 row-major ordering.. i.e for the following matrix, the input data expected is 3987 as shown 3988 3989 $ 1 0 0 3990 $ 2 0 3 P0 3991 $ ------- 3992 $ 4 5 6 P1 3993 $ 3994 $ Process0 [P0]: rows_owned=[0,1] 3995 $ i = {0,1,3} [size = nrow+1 = 2+1] 3996 $ j = {0,0,2} [size = 3] 3997 $ v = {1,2,3} [size = 3] 3998 $ 3999 $ Process1 [P1]: rows_owned=[2] 4000 $ i = {0,3} [size = nrow+1 = 1+1] 4001 $ j = {0,1,2} [size = 3] 4002 $ v = {4,5,6} [size = 3] 4003 4004 .keywords: matrix, aij, compressed row, sparse, parallel 4005 4006 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4007 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4008 @*/ 4009 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4010 { 4011 PetscErrorCode ierr; 4012 4013 PetscFunctionBegin; 4014 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4015 PetscFunctionReturn(0); 4016 } 4017 4018 /*@C 4019 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4020 (the default parallel PETSc format). For good matrix assembly performance 4021 the user should preallocate the matrix storage by setting the parameters 4022 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4023 performance can be increased by more than a factor of 50. 4024 4025 Collective on MPI_Comm 4026 4027 Input Parameters: 4028 + B - the matrix 4029 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4030 (same value is used for all local rows) 4031 . d_nnz - array containing the number of nonzeros in the various rows of the 4032 DIAGONAL portion of the local submatrix (possibly different for each row) 4033 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4034 The size of this array is equal to the number of local rows, i.e 'm'. 4035 For matrices that will be factored, you must leave room for (and set) 4036 the diagonal entry even if it is zero. 4037 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4038 submatrix (same value is used for all local rows). 4039 - o_nnz - array containing the number of nonzeros in the various rows of the 4040 OFF-DIAGONAL portion of the local submatrix (possibly different for 4041 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4042 structure. The size of this array is equal to the number 4043 of local rows, i.e 'm'. 4044 4045 If the *_nnz parameter is given then the *_nz parameter is ignored 4046 4047 The AIJ format (also called the Yale sparse matrix format or 4048 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4049 storage. The stored row and column indices begin with zero. 4050 See Users-Manual: ch_mat for details. 4051 4052 The parallel matrix is partitioned such that the first m0 rows belong to 4053 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4054 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4055 4056 The DIAGONAL portion of the local submatrix of a processor can be defined 4057 as the submatrix which is obtained by extraction the part corresponding to 4058 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4059 first row that belongs to the processor, r2 is the last row belonging to 4060 the this processor, and c1-c2 is range of indices of the local part of a 4061 vector suitable for applying the matrix to. This is an mxn matrix. In the 4062 common case of a square matrix, the row and column ranges are the same and 4063 the DIAGONAL part is also square. The remaining portion of the local 4064 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4065 4066 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4067 4068 You can call MatGetInfo() to get information on how effective the preallocation was; 4069 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4070 You can also run with the option -info and look for messages with the string 4071 malloc in them to see if additional memory allocation was needed. 4072 4073 Example usage: 4074 4075 Consider the following 8x8 matrix with 34 non-zero values, that is 4076 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4077 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4078 as follows: 4079 4080 .vb 4081 1 2 0 | 0 3 0 | 0 4 4082 Proc0 0 5 6 | 7 0 0 | 8 0 4083 9 0 10 | 11 0 0 | 12 0 4084 ------------------------------------- 4085 13 0 14 | 15 16 17 | 0 0 4086 Proc1 0 18 0 | 19 20 21 | 0 0 4087 0 0 0 | 22 23 0 | 24 0 4088 ------------------------------------- 4089 Proc2 25 26 27 | 0 0 28 | 29 0 4090 30 0 0 | 31 32 33 | 0 34 4091 .ve 4092 4093 This can be represented as a collection of submatrices as: 4094 4095 .vb 4096 A B C 4097 D E F 4098 G H I 4099 .ve 4100 4101 Where the submatrices A,B,C are owned by proc0, D,E,F are 4102 owned by proc1, G,H,I are owned by proc2. 4103 4104 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4105 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4106 The 'M','N' parameters are 8,8, and have the same values on all procs. 4107 4108 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4109 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4110 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4111 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4112 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4113 matrix, ans [DF] as another SeqAIJ matrix. 4114 4115 When d_nz, o_nz parameters are specified, d_nz storage elements are 4116 allocated for every row of the local diagonal submatrix, and o_nz 4117 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4118 One way to choose d_nz and o_nz is to use the max nonzerors per local 4119 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4120 In this case, the values of d_nz,o_nz are: 4121 .vb 4122 proc0 : dnz = 2, o_nz = 2 4123 proc1 : dnz = 3, o_nz = 2 4124 proc2 : dnz = 1, o_nz = 4 4125 .ve 4126 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4127 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4128 for proc3. i.e we are using 12+15+10=37 storage locations to store 4129 34 values. 4130 4131 When d_nnz, o_nnz parameters are specified, the storage is specified 4132 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4133 In the above case the values for d_nnz,o_nnz are: 4134 .vb 4135 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4136 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4137 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4138 .ve 4139 Here the space allocated is sum of all the above values i.e 34, and 4140 hence pre-allocation is perfect. 4141 4142 Level: intermediate 4143 4144 .keywords: matrix, aij, compressed row, sparse, parallel 4145 4146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4147 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4148 @*/ 4149 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4150 { 4151 PetscErrorCode ierr; 4152 4153 PetscFunctionBegin; 4154 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4155 PetscValidType(B,1); 4156 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@ 4161 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4162 CSR format the local rows. 4163 4164 Collective on MPI_Comm 4165 4166 Input Parameters: 4167 + comm - MPI communicator 4168 . m - number of local rows (Cannot be PETSC_DECIDE) 4169 . n - This value should be the same as the local size used in creating the 4170 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4171 calculated if N is given) For square matrices n is almost always m. 4172 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4173 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4174 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4175 . j - column indices 4176 - a - matrix values 4177 4178 Output Parameter: 4179 . mat - the matrix 4180 4181 Level: intermediate 4182 4183 Notes: 4184 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4185 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4186 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4187 4188 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4189 4190 The format which is used for the sparse matrix input, is equivalent to a 4191 row-major ordering.. i.e for the following matrix, the input data expected is 4192 as shown 4193 4194 $ 1 0 0 4195 $ 2 0 3 P0 4196 $ ------- 4197 $ 4 5 6 P1 4198 $ 4199 $ Process0 [P0]: rows_owned=[0,1] 4200 $ i = {0,1,3} [size = nrow+1 = 2+1] 4201 $ j = {0,0,2} [size = 3] 4202 $ v = {1,2,3} [size = 3] 4203 $ 4204 $ Process1 [P1]: rows_owned=[2] 4205 $ i = {0,3} [size = nrow+1 = 1+1] 4206 $ j = {0,1,2} [size = 3] 4207 $ v = {4,5,6} [size = 3] 4208 4209 .keywords: matrix, aij, compressed row, sparse, parallel 4210 4211 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4212 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4213 @*/ 4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4215 { 4216 PetscErrorCode ierr; 4217 4218 PetscFunctionBegin; 4219 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4220 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4221 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4222 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4223 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4224 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4225 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4226 PetscFunctionReturn(0); 4227 } 4228 4229 /*@C 4230 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4231 (the default parallel PETSc format). For good matrix assembly performance 4232 the user should preallocate the matrix storage by setting the parameters 4233 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4234 performance can be increased by more than a factor of 50. 4235 4236 Collective on MPI_Comm 4237 4238 Input Parameters: 4239 + comm - MPI communicator 4240 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4241 This value should be the same as the local size used in creating the 4242 y vector for the matrix-vector product y = Ax. 4243 . n - This value should be the same as the local size used in creating the 4244 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4245 calculated if N is given) For square matrices n is almost always m. 4246 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4247 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4248 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4249 (same value is used for all local rows) 4250 . d_nnz - array containing the number of nonzeros in the various rows of the 4251 DIAGONAL portion of the local submatrix (possibly different for each row) 4252 or NULL, if d_nz is used to specify the nonzero structure. 4253 The size of this array is equal to the number of local rows, i.e 'm'. 4254 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4255 submatrix (same value is used for all local rows). 4256 - o_nnz - array containing the number of nonzeros in the various rows of the 4257 OFF-DIAGONAL portion of the local submatrix (possibly different for 4258 each row) or NULL, if o_nz is used to specify the nonzero 4259 structure. The size of this array is equal to the number 4260 of local rows, i.e 'm'. 4261 4262 Output Parameter: 4263 . A - the matrix 4264 4265 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4266 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4267 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4268 4269 Notes: 4270 If the *_nnz parameter is given then the *_nz parameter is ignored 4271 4272 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4273 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4274 storage requirements for this matrix. 4275 4276 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4277 processor than it must be used on all processors that share the object for 4278 that argument. 4279 4280 The user MUST specify either the local or global matrix dimensions 4281 (possibly both). 4282 4283 The parallel matrix is partitioned across processors such that the 4284 first m0 rows belong to process 0, the next m1 rows belong to 4285 process 1, the next m2 rows belong to process 2 etc.. where 4286 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4287 values corresponding to [m x N] submatrix. 4288 4289 The columns are logically partitioned with the n0 columns belonging 4290 to 0th partition, the next n1 columns belonging to the next 4291 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4292 4293 The DIAGONAL portion of the local submatrix on any given processor 4294 is the submatrix corresponding to the rows and columns m,n 4295 corresponding to the given processor. i.e diagonal matrix on 4296 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4297 etc. The remaining portion of the local submatrix [m x (N-n)] 4298 constitute the OFF-DIAGONAL portion. The example below better 4299 illustrates this concept. 4300 4301 For a square global matrix we define each processor's diagonal portion 4302 to be its local rows and the corresponding columns (a square submatrix); 4303 each processor's off-diagonal portion encompasses the remainder of the 4304 local matrix (a rectangular submatrix). 4305 4306 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4307 4308 When calling this routine with a single process communicator, a matrix of 4309 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4310 type of communicator, use the construction mechanism 4311 .vb 4312 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4313 .ve 4314 4315 $ MatCreate(...,&A); 4316 $ MatSetType(A,MATMPIAIJ); 4317 $ MatSetSizes(A, m,n,M,N); 4318 $ MatMPIAIJSetPreallocation(A,...); 4319 4320 By default, this format uses inodes (identical nodes) when possible. 4321 We search for consecutive rows with the same nonzero structure, thereby 4322 reusing matrix information to achieve increased efficiency. 4323 4324 Options Database Keys: 4325 + -mat_no_inode - Do not use inodes 4326 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4327 4328 4329 4330 Example usage: 4331 4332 Consider the following 8x8 matrix with 34 non-zero values, that is 4333 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4334 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4335 as follows 4336 4337 .vb 4338 1 2 0 | 0 3 0 | 0 4 4339 Proc0 0 5 6 | 7 0 0 | 8 0 4340 9 0 10 | 11 0 0 | 12 0 4341 ------------------------------------- 4342 13 0 14 | 15 16 17 | 0 0 4343 Proc1 0 18 0 | 19 20 21 | 0 0 4344 0 0 0 | 22 23 0 | 24 0 4345 ------------------------------------- 4346 Proc2 25 26 27 | 0 0 28 | 29 0 4347 30 0 0 | 31 32 33 | 0 34 4348 .ve 4349 4350 This can be represented as a collection of submatrices as 4351 4352 .vb 4353 A B C 4354 D E F 4355 G H I 4356 .ve 4357 4358 Where the submatrices A,B,C are owned by proc0, D,E,F are 4359 owned by proc1, G,H,I are owned by proc2. 4360 4361 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4362 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4363 The 'M','N' parameters are 8,8, and have the same values on all procs. 4364 4365 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4366 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4367 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4368 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4369 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4370 matrix, ans [DF] as another SeqAIJ matrix. 4371 4372 When d_nz, o_nz parameters are specified, d_nz storage elements are 4373 allocated for every row of the local diagonal submatrix, and o_nz 4374 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4375 One way to choose d_nz and o_nz is to use the max nonzerors per local 4376 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4377 In this case, the values of d_nz,o_nz are 4378 .vb 4379 proc0 : dnz = 2, o_nz = 2 4380 proc1 : dnz = 3, o_nz = 2 4381 proc2 : dnz = 1, o_nz = 4 4382 .ve 4383 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4384 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4385 for proc3. i.e we are using 12+15+10=37 storage locations to store 4386 34 values. 4387 4388 When d_nnz, o_nnz parameters are specified, the storage is specified 4389 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4390 In the above case the values for d_nnz,o_nnz are 4391 .vb 4392 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4393 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4394 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4395 .ve 4396 Here the space allocated is sum of all the above values i.e 34, and 4397 hence pre-allocation is perfect. 4398 4399 Level: intermediate 4400 4401 .keywords: matrix, aij, compressed row, sparse, parallel 4402 4403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4404 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4405 @*/ 4406 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4407 { 4408 PetscErrorCode ierr; 4409 PetscMPIInt size; 4410 4411 PetscFunctionBegin; 4412 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4413 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4415 if (size > 1) { 4416 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4417 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4418 } else { 4419 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4420 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4421 } 4422 PetscFunctionReturn(0); 4423 } 4424 4425 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4426 { 4427 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4428 PetscBool flg; 4429 PetscErrorCode ierr; 4430 4431 PetscFunctionBegin; 4432 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4433 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4434 if (Ad) *Ad = a->A; 4435 if (Ao) *Ao = a->B; 4436 if (colmap) *colmap = a->garray; 4437 PetscFunctionReturn(0); 4438 } 4439 4440 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4441 { 4442 PetscErrorCode ierr; 4443 PetscInt m,N,i,rstart,nnz,Ii; 4444 PetscInt *indx; 4445 PetscScalar *values; 4446 4447 PetscFunctionBegin; 4448 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4449 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4450 PetscInt *dnz,*onz,sum,bs,cbs; 4451 4452 if (n == PETSC_DECIDE) { 4453 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4454 } 4455 /* Check sum(n) = N */ 4456 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4457 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4458 4459 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4460 rstart -= m; 4461 4462 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4463 for (i=0; i<m; i++) { 4464 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4465 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4466 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4467 } 4468 4469 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4470 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4471 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4472 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4473 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4474 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4475 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4476 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4477 } 4478 4479 /* numeric phase */ 4480 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4481 for (i=0; i<m; i++) { 4482 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4483 Ii = i + rstart; 4484 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4485 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4486 } 4487 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4488 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4489 PetscFunctionReturn(0); 4490 } 4491 4492 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4493 { 4494 PetscErrorCode ierr; 4495 PetscMPIInt rank; 4496 PetscInt m,N,i,rstart,nnz; 4497 size_t len; 4498 const PetscInt *indx; 4499 PetscViewer out; 4500 char *name; 4501 Mat B; 4502 const PetscScalar *values; 4503 4504 PetscFunctionBegin; 4505 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4506 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4507 /* Should this be the type of the diagonal block of A? */ 4508 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4509 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4510 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4511 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4512 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4513 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4514 for (i=0; i<m; i++) { 4515 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4516 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4517 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4518 } 4519 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4520 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4521 4522 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4523 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4524 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4525 sprintf(name,"%s.%d",outfile,rank); 4526 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4527 ierr = PetscFree(name);CHKERRQ(ierr); 4528 ierr = MatView(B,out);CHKERRQ(ierr); 4529 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4530 ierr = MatDestroy(&B);CHKERRQ(ierr); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4535 { 4536 PetscErrorCode ierr; 4537 Mat_Merge_SeqsToMPI *merge; 4538 PetscContainer container; 4539 4540 PetscFunctionBegin; 4541 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4542 if (container) { 4543 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4547 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4548 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4549 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4550 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4551 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4552 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4553 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4554 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4555 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4556 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4557 ierr = PetscFree(merge);CHKERRQ(ierr); 4558 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4559 } 4560 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 #include <../src/mat/utils/freespace.h> 4565 #include <petscbt.h> 4566 4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4568 { 4569 PetscErrorCode ierr; 4570 MPI_Comm comm; 4571 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4572 PetscMPIInt size,rank,taga,*len_s; 4573 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4574 PetscInt proc,m; 4575 PetscInt **buf_ri,**buf_rj; 4576 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4577 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4578 MPI_Request *s_waits,*r_waits; 4579 MPI_Status *status; 4580 MatScalar *aa=a->a; 4581 MatScalar **abuf_r,*ba_i; 4582 Mat_Merge_SeqsToMPI *merge; 4583 PetscContainer container; 4584 4585 PetscFunctionBegin; 4586 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4587 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4588 4589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4590 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4591 4592 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4593 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4594 4595 bi = merge->bi; 4596 bj = merge->bj; 4597 buf_ri = merge->buf_ri; 4598 buf_rj = merge->buf_rj; 4599 4600 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4601 owners = merge->rowmap->range; 4602 len_s = merge->len_s; 4603 4604 /* send and recv matrix values */ 4605 /*-----------------------------*/ 4606 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4607 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4608 4609 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4610 for (proc=0,k=0; proc<size; proc++) { 4611 if (!len_s[proc]) continue; 4612 i = owners[proc]; 4613 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4614 k++; 4615 } 4616 4617 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4618 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4619 ierr = PetscFree(status);CHKERRQ(ierr); 4620 4621 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4622 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4623 4624 /* insert mat values of mpimat */ 4625 /*----------------------------*/ 4626 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4627 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4628 4629 for (k=0; k<merge->nrecv; k++) { 4630 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4631 nrows = *(buf_ri_k[k]); 4632 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4633 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4634 } 4635 4636 /* set values of ba */ 4637 m = merge->rowmap->n; 4638 for (i=0; i<m; i++) { 4639 arow = owners[rank] + i; 4640 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4641 bnzi = bi[i+1] - bi[i]; 4642 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4643 4644 /* add local non-zero vals of this proc's seqmat into ba */ 4645 anzi = ai[arow+1] - ai[arow]; 4646 aj = a->j + ai[arow]; 4647 aa = a->a + ai[arow]; 4648 nextaj = 0; 4649 for (j=0; nextaj<anzi; j++) { 4650 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4651 ba_i[j] += aa[nextaj++]; 4652 } 4653 } 4654 4655 /* add received vals into ba */ 4656 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4657 /* i-th row */ 4658 if (i == *nextrow[k]) { 4659 anzi = *(nextai[k]+1) - *nextai[k]; 4660 aj = buf_rj[k] + *(nextai[k]); 4661 aa = abuf_r[k] + *(nextai[k]); 4662 nextaj = 0; 4663 for (j=0; nextaj<anzi; j++) { 4664 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4665 ba_i[j] += aa[nextaj++]; 4666 } 4667 } 4668 nextrow[k]++; nextai[k]++; 4669 } 4670 } 4671 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4672 } 4673 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 4676 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4677 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4678 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4679 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4680 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4681 PetscFunctionReturn(0); 4682 } 4683 4684 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4685 { 4686 PetscErrorCode ierr; 4687 Mat B_mpi; 4688 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4689 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4690 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4691 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4692 PetscInt len,proc,*dnz,*onz,bs,cbs; 4693 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4694 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4695 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4696 MPI_Status *status; 4697 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4698 PetscBT lnkbt; 4699 Mat_Merge_SeqsToMPI *merge; 4700 PetscContainer container; 4701 4702 PetscFunctionBegin; 4703 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4704 4705 /* make sure it is a PETSc comm */ 4706 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4707 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4708 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4709 4710 ierr = PetscNew(&merge);CHKERRQ(ierr); 4711 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4712 4713 /* determine row ownership */ 4714 /*---------------------------------------------------------*/ 4715 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4716 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4717 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4718 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4719 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4720 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4721 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4722 4723 m = merge->rowmap->n; 4724 owners = merge->rowmap->range; 4725 4726 /* determine the number of messages to send, their lengths */ 4727 /*---------------------------------------------------------*/ 4728 len_s = merge->len_s; 4729 4730 len = 0; /* length of buf_si[] */ 4731 merge->nsend = 0; 4732 for (proc=0; proc<size; proc++) { 4733 len_si[proc] = 0; 4734 if (proc == rank) { 4735 len_s[proc] = 0; 4736 } else { 4737 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4738 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4739 } 4740 if (len_s[proc]) { 4741 merge->nsend++; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 if (ai[i+1] > ai[i]) nrows++; 4745 } 4746 len_si[proc] = 2*(nrows+1); 4747 len += len_si[proc]; 4748 } 4749 } 4750 4751 /* determine the number and length of messages to receive for ij-structure */ 4752 /*-------------------------------------------------------------------------*/ 4753 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4754 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4755 4756 /* post the Irecv of j-structure */ 4757 /*-------------------------------*/ 4758 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4759 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4760 4761 /* post the Isend of j-structure */ 4762 /*--------------------------------*/ 4763 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4764 4765 for (proc=0, k=0; proc<size; proc++) { 4766 if (!len_s[proc]) continue; 4767 i = owners[proc]; 4768 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4769 k++; 4770 } 4771 4772 /* receives and sends of j-structure are complete */ 4773 /*------------------------------------------------*/ 4774 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4775 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4776 4777 /* send and recv i-structure */ 4778 /*---------------------------*/ 4779 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4780 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4781 4782 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4783 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4784 for (proc=0,k=0; proc<size; proc++) { 4785 if (!len_s[proc]) continue; 4786 /* form outgoing message for i-structure: 4787 buf_si[0]: nrows to be sent 4788 [1:nrows]: row index (global) 4789 [nrows+1:2*nrows+1]: i-structure index 4790 */ 4791 /*-------------------------------------------*/ 4792 nrows = len_si[proc]/2 - 1; 4793 buf_si_i = buf_si + nrows+1; 4794 buf_si[0] = nrows; 4795 buf_si_i[0] = 0; 4796 nrows = 0; 4797 for (i=owners[proc]; i<owners[proc+1]; i++) { 4798 anzi = ai[i+1] - ai[i]; 4799 if (anzi) { 4800 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4801 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4802 nrows++; 4803 } 4804 } 4805 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4806 k++; 4807 buf_si += len_si[proc]; 4808 } 4809 4810 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4811 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4812 4813 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4814 for (i=0; i<merge->nrecv; i++) { 4815 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4816 } 4817 4818 ierr = PetscFree(len_si);CHKERRQ(ierr); 4819 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4820 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4821 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4822 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4823 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4824 ierr = PetscFree(status);CHKERRQ(ierr); 4825 4826 /* compute a local seq matrix in each processor */ 4827 /*----------------------------------------------*/ 4828 /* allocate bi array and free space for accumulating nonzero column info */ 4829 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4830 bi[0] = 0; 4831 4832 /* create and initialize a linked list */ 4833 nlnk = N+1; 4834 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4835 4836 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4837 len = ai[owners[rank+1]] - ai[owners[rank]]; 4838 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4839 4840 current_space = free_space; 4841 4842 /* determine symbolic info for each local row */ 4843 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4844 4845 for (k=0; k<merge->nrecv; k++) { 4846 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4847 nrows = *buf_ri_k[k]; 4848 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4849 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4850 } 4851 4852 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4853 len = 0; 4854 for (i=0; i<m; i++) { 4855 bnzi = 0; 4856 /* add local non-zero cols of this proc's seqmat into lnk */ 4857 arow = owners[rank] + i; 4858 anzi = ai[arow+1] - ai[arow]; 4859 aj = a->j + ai[arow]; 4860 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4861 bnzi += nlnk; 4862 /* add received col data into lnk */ 4863 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4864 if (i == *nextrow[k]) { /* i-th row */ 4865 anzi = *(nextai[k]+1) - *nextai[k]; 4866 aj = buf_rj[k] + *nextai[k]; 4867 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4868 bnzi += nlnk; 4869 nextrow[k]++; nextai[k]++; 4870 } 4871 } 4872 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4873 4874 /* if free space is not available, make more free space */ 4875 if (current_space->local_remaining<bnzi) { 4876 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4877 nspacedouble++; 4878 } 4879 /* copy data into free space, then initialize lnk */ 4880 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4881 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4882 4883 current_space->array += bnzi; 4884 current_space->local_used += bnzi; 4885 current_space->local_remaining -= bnzi; 4886 4887 bi[i+1] = bi[i] + bnzi; 4888 } 4889 4890 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4891 4892 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4893 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4894 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4895 4896 /* create symbolic parallel matrix B_mpi */ 4897 /*---------------------------------------*/ 4898 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4899 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4900 if (n==PETSC_DECIDE) { 4901 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4902 } else { 4903 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4904 } 4905 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4906 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4907 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4908 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4909 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4910 4911 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4912 B_mpi->assembled = PETSC_FALSE; 4913 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4914 merge->bi = bi; 4915 merge->bj = bj; 4916 merge->buf_ri = buf_ri; 4917 merge->buf_rj = buf_rj; 4918 merge->coi = NULL; 4919 merge->coj = NULL; 4920 merge->owners_co = NULL; 4921 4922 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4923 4924 /* attach the supporting struct to B_mpi for reuse */ 4925 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4926 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4927 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4928 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4929 *mpimat = B_mpi; 4930 4931 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4932 PetscFunctionReturn(0); 4933 } 4934 4935 /*@C 4936 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4937 matrices from each processor 4938 4939 Collective on MPI_Comm 4940 4941 Input Parameters: 4942 + comm - the communicators the parallel matrix will live on 4943 . seqmat - the input sequential matrices 4944 . m - number of local rows (or PETSC_DECIDE) 4945 . n - number of local columns (or PETSC_DECIDE) 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . mpimat - the parallel matrix generated 4950 4951 Level: advanced 4952 4953 Notes: 4954 The dimensions of the sequential matrix in each processor MUST be the same. 4955 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4956 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4957 @*/ 4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4959 { 4960 PetscErrorCode ierr; 4961 PetscMPIInt size; 4962 4963 PetscFunctionBegin; 4964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4965 if (size == 1) { 4966 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 if (scall == MAT_INITIAL_MATRIX) { 4968 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4969 } else { 4970 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4971 } 4972 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4973 PetscFunctionReturn(0); 4974 } 4975 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4978 } 4979 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4980 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /*@ 4985 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4986 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4987 with MatGetSize() 4988 4989 Not Collective 4990 4991 Input Parameters: 4992 + A - the matrix 4993 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4994 4995 Output Parameter: 4996 . A_loc - the local sequential matrix generated 4997 4998 Level: developer 4999 5000 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5001 5002 @*/ 5003 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5004 { 5005 PetscErrorCode ierr; 5006 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5007 Mat_SeqAIJ *mat,*a,*b; 5008 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5009 MatScalar *aa,*ba,*cam; 5010 PetscScalar *ca; 5011 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5012 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5013 PetscBool match; 5014 MPI_Comm comm; 5015 PetscMPIInt size; 5016 5017 PetscFunctionBegin; 5018 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5019 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5020 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5022 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5023 5024 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5025 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5026 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5027 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5028 aa = a->a; ba = b->a; 5029 if (scall == MAT_INITIAL_MATRIX) { 5030 if (size == 1) { 5031 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5032 PetscFunctionReturn(0); 5033 } 5034 5035 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5036 ci[0] = 0; 5037 for (i=0; i<am; i++) { 5038 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5039 } 5040 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5041 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5042 k = 0; 5043 for (i=0; i<am; i++) { 5044 ncols_o = bi[i+1] - bi[i]; 5045 ncols_d = ai[i+1] - ai[i]; 5046 /* off-diagonal portion of A */ 5047 for (jo=0; jo<ncols_o; jo++) { 5048 col = cmap[*bj]; 5049 if (col >= cstart) break; 5050 cj[k] = col; bj++; 5051 ca[k++] = *ba++; 5052 } 5053 /* diagonal portion of A */ 5054 for (j=0; j<ncols_d; j++) { 5055 cj[k] = cstart + *aj++; 5056 ca[k++] = *aa++; 5057 } 5058 /* off-diagonal portion of A */ 5059 for (j=jo; j<ncols_o; j++) { 5060 cj[k] = cmap[*bj++]; 5061 ca[k++] = *ba++; 5062 } 5063 } 5064 /* put together the new matrix */ 5065 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5066 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5067 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5068 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5069 mat->free_a = PETSC_TRUE; 5070 mat->free_ij = PETSC_TRUE; 5071 mat->nonew = 0; 5072 } else if (scall == MAT_REUSE_MATRIX) { 5073 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5074 ci = mat->i; cj = mat->j; cam = mat->a; 5075 for (i=0; i<am; i++) { 5076 /* off-diagonal portion of A */ 5077 ncols_o = bi[i+1] - bi[i]; 5078 for (jo=0; jo<ncols_o; jo++) { 5079 col = cmap[*bj]; 5080 if (col >= cstart) break; 5081 *cam++ = *ba++; bj++; 5082 } 5083 /* diagonal portion of A */ 5084 ncols_d = ai[i+1] - ai[i]; 5085 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5086 /* off-diagonal portion of A */ 5087 for (j=jo; j<ncols_o; j++) { 5088 *cam++ = *ba++; bj++; 5089 } 5090 } 5091 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5092 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5093 PetscFunctionReturn(0); 5094 } 5095 5096 /*@C 5097 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 - row, col - index sets of rows and columns to extract (or NULL) 5105 5106 Output Parameter: 5107 . A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5112 5113 @*/ 5114 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5115 { 5116 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5117 PetscErrorCode ierr; 5118 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5119 IS isrowa,iscola; 5120 Mat *aloc; 5121 PetscBool match; 5122 5123 PetscFunctionBegin; 5124 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5125 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5126 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 if (!row) { 5128 start = A->rmap->rstart; end = A->rmap->rend; 5129 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5130 } else { 5131 isrowa = *row; 5132 } 5133 if (!col) { 5134 start = A->cmap->rstart; 5135 cmap = a->garray; 5136 nzA = a->A->cmap->n; 5137 nzB = a->B->cmap->n; 5138 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5139 ncols = 0; 5140 for (i=0; i<nzB; i++) { 5141 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5142 else break; 5143 } 5144 imark = i; 5145 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5146 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5147 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5148 } else { 5149 iscola = *col; 5150 } 5151 if (scall != MAT_INITIAL_MATRIX) { 5152 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5153 aloc[0] = *A_loc; 5154 } 5155 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5156 if (!col) { /* attach global id of condensed columns */ 5157 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5158 } 5159 *A_loc = aloc[0]; 5160 ierr = PetscFree(aloc);CHKERRQ(ierr); 5161 if (!row) { 5162 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5163 } 5164 if (!col) { 5165 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5166 } 5167 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 /*@C 5172 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5173 5174 Collective on Mat 5175 5176 Input Parameters: 5177 + A,B - the matrices in mpiaij format 5178 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5179 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5180 5181 Output Parameter: 5182 + rowb, colb - index sets of rows and columns of B to extract 5183 - B_seq - the sequential matrix generated 5184 5185 Level: developer 5186 5187 @*/ 5188 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5189 { 5190 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5191 PetscErrorCode ierr; 5192 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5193 IS isrowb,iscolb; 5194 Mat *bseq=NULL; 5195 5196 PetscFunctionBegin; 5197 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5198 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5199 } 5200 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5201 5202 if (scall == MAT_INITIAL_MATRIX) { 5203 start = A->cmap->rstart; 5204 cmap = a->garray; 5205 nzA = a->A->cmap->n; 5206 nzB = a->B->cmap->n; 5207 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5208 ncols = 0; 5209 for (i=0; i<nzB; i++) { /* row < local row index */ 5210 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5211 else break; 5212 } 5213 imark = i; 5214 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5215 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5216 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5217 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5218 } else { 5219 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5220 isrowb = *rowb; iscolb = *colb; 5221 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5222 bseq[0] = *B_seq; 5223 } 5224 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5225 *B_seq = bseq[0]; 5226 ierr = PetscFree(bseq);CHKERRQ(ierr); 5227 if (!rowb) { 5228 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5229 } else { 5230 *rowb = isrowb; 5231 } 5232 if (!colb) { 5233 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5234 } else { 5235 *colb = iscolb; 5236 } 5237 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5238 PetscFunctionReturn(0); 5239 } 5240 5241 #include <petsc/private/vecscatterimpl.h> 5242 /* 5243 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5244 of the OFF-DIAGONAL portion of local A 5245 5246 Collective on Mat 5247 5248 Input Parameters: 5249 + A,B - the matrices in mpiaij format 5250 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5251 5252 Output Parameter: 5253 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5254 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5255 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5256 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5257 5258 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5259 for this matrix. This is not desirable.. 5260 5261 Level: developer 5262 5263 */ 5264 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5265 { 5266 VecScatter_MPI_General *gen_to,*gen_from; 5267 PetscErrorCode ierr; 5268 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5269 Mat_SeqAIJ *b_oth; 5270 VecScatter ctx; 5271 MPI_Comm comm; 5272 PetscMPIInt *rprocs,*sprocs,tag,rank; 5273 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5274 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5275 PetscScalar *b_otha,*bufa,*bufA,*vals; 5276 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5277 MPI_Request *rwaits = NULL,*swaits = NULL; 5278 MPI_Status *sstatus,rstatus; 5279 PetscMPIInt jj,size; 5280 VecScatterType type; 5281 PetscBool mpi1; 5282 5283 PetscFunctionBegin; 5284 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5286 5287 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5288 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5289 } 5290 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5291 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5292 5293 if (size == 1) { 5294 startsj_s = NULL; 5295 bufa_ptr = NULL; 5296 *B_oth = NULL; 5297 PetscFunctionReturn(0); 5298 } 5299 5300 ctx = a->Mvctx; 5301 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5302 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5303 if (!mpi1) { 5304 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5305 thus create a->Mvctx_mpi1 */ 5306 if (!a->Mvctx_mpi1) { 5307 a->Mvctx_mpi1_flg = PETSC_TRUE; 5308 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5309 } 5310 ctx = a->Mvctx_mpi1; 5311 } 5312 tag = ((PetscObject)ctx)->tag; 5313 5314 gen_to = (VecScatter_MPI_General*)ctx->todata; 5315 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5316 nrecvs = gen_from->n; 5317 nsends = gen_to->n; 5318 5319 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5320 srow = gen_to->indices; /* local row index to be sent */ 5321 sstarts = gen_to->starts; 5322 sprocs = gen_to->procs; 5323 sstatus = gen_to->sstatus; 5324 sbs = gen_to->bs; 5325 rstarts = gen_from->starts; 5326 rprocs = gen_from->procs; 5327 rbs = gen_from->bs; 5328 5329 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5330 if (scall == MAT_INITIAL_MATRIX) { 5331 /* i-array */ 5332 /*---------*/ 5333 /* post receives */ 5334 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5335 for (i=0; i<nrecvs; i++) { 5336 rowlen = rvalues + rstarts[i]*rbs; 5337 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5338 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5339 } 5340 5341 /* pack the outgoing message */ 5342 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5343 5344 sstartsj[0] = 0; 5345 rstartsj[0] = 0; 5346 len = 0; /* total length of j or a array to be sent */ 5347 k = 0; 5348 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5349 for (i=0; i<nsends; i++) { 5350 rowlen = svalues + sstarts[i]*sbs; 5351 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5352 for (j=0; j<nrows; j++) { 5353 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5354 for (l=0; l<sbs; l++) { 5355 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5356 5357 rowlen[j*sbs+l] = ncols; 5358 5359 len += ncols; 5360 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5361 } 5362 k++; 5363 } 5364 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5365 5366 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5367 } 5368 /* recvs and sends of i-array are completed */ 5369 i = nrecvs; 5370 while (i--) { 5371 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5372 } 5373 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5374 ierr = PetscFree(svalues);CHKERRQ(ierr); 5375 5376 /* allocate buffers for sending j and a arrays */ 5377 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5378 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5379 5380 /* create i-array of B_oth */ 5381 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5382 5383 b_othi[0] = 0; 5384 len = 0; /* total length of j or a array to be received */ 5385 k = 0; 5386 for (i=0; i<nrecvs; i++) { 5387 rowlen = rvalues + rstarts[i]*rbs; 5388 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5389 for (j=0; j<nrows; j++) { 5390 b_othi[k+1] = b_othi[k] + rowlen[j]; 5391 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5392 k++; 5393 } 5394 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5395 } 5396 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5397 5398 /* allocate space for j and a arrrays of B_oth */ 5399 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5400 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5401 5402 /* j-array */ 5403 /*---------*/ 5404 /* post receives of j-array */ 5405 for (i=0; i<nrecvs; i++) { 5406 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5407 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5408 } 5409 5410 /* pack the outgoing message j-array */ 5411 k = 0; 5412 for (i=0; i<nsends; i++) { 5413 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5414 bufJ = bufj+sstartsj[i]; 5415 for (j=0; j<nrows; j++) { 5416 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5417 for (ll=0; ll<sbs; ll++) { 5418 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5419 for (l=0; l<ncols; l++) { 5420 *bufJ++ = cols[l]; 5421 } 5422 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5423 } 5424 } 5425 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5426 } 5427 5428 /* recvs and sends of j-array are completed */ 5429 i = nrecvs; 5430 while (i--) { 5431 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5432 } 5433 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5434 } else if (scall == MAT_REUSE_MATRIX) { 5435 sstartsj = *startsj_s; 5436 rstartsj = *startsj_r; 5437 bufa = *bufa_ptr; 5438 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5439 b_otha = b_oth->a; 5440 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5441 5442 /* a-array */ 5443 /*---------*/ 5444 /* post receives of a-array */ 5445 for (i=0; i<nrecvs; i++) { 5446 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5447 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5448 } 5449 5450 /* pack the outgoing message a-array */ 5451 k = 0; 5452 for (i=0; i<nsends; i++) { 5453 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5454 bufA = bufa+sstartsj[i]; 5455 for (j=0; j<nrows; j++) { 5456 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5457 for (ll=0; ll<sbs; ll++) { 5458 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5459 for (l=0; l<ncols; l++) { 5460 *bufA++ = vals[l]; 5461 } 5462 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5463 } 5464 } 5465 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5466 } 5467 /* recvs and sends of a-array are completed */ 5468 i = nrecvs; 5469 while (i--) { 5470 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5471 } 5472 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5473 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5474 5475 if (scall == MAT_INITIAL_MATRIX) { 5476 /* put together the new matrix */ 5477 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5478 5479 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5480 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5481 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5482 b_oth->free_a = PETSC_TRUE; 5483 b_oth->free_ij = PETSC_TRUE; 5484 b_oth->nonew = 0; 5485 5486 ierr = PetscFree(bufj);CHKERRQ(ierr); 5487 if (!startsj_s || !bufa_ptr) { 5488 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5489 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5490 } else { 5491 *startsj_s = sstartsj; 5492 *startsj_r = rstartsj; 5493 *bufa_ptr = bufa; 5494 } 5495 } 5496 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5497 PetscFunctionReturn(0); 5498 } 5499 5500 /*@C 5501 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5502 5503 Not Collective 5504 5505 Input Parameters: 5506 . A - The matrix in mpiaij format 5507 5508 Output Parameter: 5509 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5510 . colmap - A map from global column index to local index into lvec 5511 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5512 5513 Level: developer 5514 5515 @*/ 5516 #if defined(PETSC_USE_CTABLE) 5517 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5518 #else 5519 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5520 #endif 5521 { 5522 Mat_MPIAIJ *a; 5523 5524 PetscFunctionBegin; 5525 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5526 PetscValidPointer(lvec, 2); 5527 PetscValidPointer(colmap, 3); 5528 PetscValidPointer(multScatter, 4); 5529 a = (Mat_MPIAIJ*) A->data; 5530 if (lvec) *lvec = a->lvec; 5531 if (colmap) *colmap = a->colmap; 5532 if (multScatter) *multScatter = a->Mvctx; 5533 PetscFunctionReturn(0); 5534 } 5535 5536 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5538 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5539 #if defined(PETSC_HAVE_MKL_SPARSE) 5540 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5541 #endif 5542 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5543 #if defined(PETSC_HAVE_ELEMENTAL) 5544 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5545 #endif 5546 #if defined(PETSC_HAVE_HYPRE) 5547 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5548 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5549 #endif 5550 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5551 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5552 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5553 5554 /* 5555 Computes (B'*A')' since computing B*A directly is untenable 5556 5557 n p p 5558 ( ) ( ) ( ) 5559 m ( A ) * n ( B ) = m ( C ) 5560 ( ) ( ) ( ) 5561 5562 */ 5563 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5564 { 5565 PetscErrorCode ierr; 5566 Mat At,Bt,Ct; 5567 5568 PetscFunctionBegin; 5569 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5570 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5571 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5572 ierr = MatDestroy(&At);CHKERRQ(ierr); 5573 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5574 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5575 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5576 PetscFunctionReturn(0); 5577 } 5578 5579 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5580 { 5581 PetscErrorCode ierr; 5582 PetscInt m=A->rmap->n,n=B->cmap->n; 5583 Mat Cmat; 5584 5585 PetscFunctionBegin; 5586 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5587 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5588 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5589 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5590 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5591 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5592 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5593 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5594 5595 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5596 5597 *C = Cmat; 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /* ----------------------------------------------------------------*/ 5602 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5603 { 5604 PetscErrorCode ierr; 5605 5606 PetscFunctionBegin; 5607 if (scall == MAT_INITIAL_MATRIX) { 5608 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5609 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5610 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5611 } 5612 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5613 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5614 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 /*MC 5619 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5620 5621 Options Database Keys: 5622 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5623 5624 Level: beginner 5625 5626 .seealso: MatCreateAIJ() 5627 M*/ 5628 5629 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5630 { 5631 Mat_MPIAIJ *b; 5632 PetscErrorCode ierr; 5633 PetscMPIInt size; 5634 5635 PetscFunctionBegin; 5636 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5637 5638 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5639 B->data = (void*)b; 5640 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5641 B->assembled = PETSC_FALSE; 5642 B->insertmode = NOT_SET_VALUES; 5643 b->size = size; 5644 5645 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5646 5647 /* build cache for off array entries formed */ 5648 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5649 5650 b->donotstash = PETSC_FALSE; 5651 b->colmap = 0; 5652 b->garray = 0; 5653 b->roworiented = PETSC_TRUE; 5654 5655 /* stuff used for matrix vector multiply */ 5656 b->lvec = NULL; 5657 b->Mvctx = NULL; 5658 5659 /* stuff for MatGetRow() */ 5660 b->rowindices = 0; 5661 b->rowvalues = 0; 5662 b->getrowactive = PETSC_FALSE; 5663 5664 /* flexible pointer used in CUSP/CUSPARSE classes */ 5665 b->spptr = NULL; 5666 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5677 #if defined(PETSC_HAVE_MKL_SPARSE) 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5679 #endif 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5682 #if defined(PETSC_HAVE_ELEMENTAL) 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5684 #endif 5685 #if defined(PETSC_HAVE_HYPRE) 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5687 #endif 5688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5693 #if defined(PETSC_HAVE_HYPRE) 5694 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5695 #endif 5696 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5697 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5698 PetscFunctionReturn(0); 5699 } 5700 5701 /*@C 5702 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5703 and "off-diagonal" part of the matrix in CSR format. 5704 5705 Collective on MPI_Comm 5706 5707 Input Parameters: 5708 + comm - MPI communicator 5709 . m - number of local rows (Cannot be PETSC_DECIDE) 5710 . n - This value should be the same as the local size used in creating the 5711 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5712 calculated if N is given) For square matrices n is almost always m. 5713 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5714 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5715 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5716 . j - column indices 5717 . a - matrix values 5718 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5719 . oj - column indices 5720 - oa - matrix values 5721 5722 Output Parameter: 5723 . mat - the matrix 5724 5725 Level: advanced 5726 5727 Notes: 5728 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5729 must free the arrays once the matrix has been destroyed and not before. 5730 5731 The i and j indices are 0 based 5732 5733 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5734 5735 This sets local rows and cannot be used to set off-processor values. 5736 5737 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5738 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5739 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5740 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5741 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5742 communication if it is known that only local entries will be set. 5743 5744 .keywords: matrix, aij, compressed row, sparse, parallel 5745 5746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5747 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5748 @*/ 5749 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5750 { 5751 PetscErrorCode ierr; 5752 Mat_MPIAIJ *maij; 5753 5754 PetscFunctionBegin; 5755 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5756 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5757 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5758 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5759 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5760 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5761 maij = (Mat_MPIAIJ*) (*mat)->data; 5762 5763 (*mat)->preallocated = PETSC_TRUE; 5764 5765 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5766 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5767 5768 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5769 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5770 5771 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5772 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5775 5776 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5777 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5780 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5781 PetscFunctionReturn(0); 5782 } 5783 5784 /* 5785 Special version for direct calls from Fortran 5786 */ 5787 #include <petsc/private/fortranimpl.h> 5788 5789 /* Change these macros so can be used in void function */ 5790 #undef CHKERRQ 5791 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5792 #undef SETERRQ2 5793 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5794 #undef SETERRQ3 5795 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5796 #undef SETERRQ 5797 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5798 5799 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5800 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5801 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5802 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5803 #else 5804 #endif 5805 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5806 { 5807 Mat mat = *mmat; 5808 PetscInt m = *mm, n = *mn; 5809 InsertMode addv = *maddv; 5810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5811 PetscScalar value; 5812 PetscErrorCode ierr; 5813 5814 MatCheckPreallocated(mat,1); 5815 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5816 5817 #if defined(PETSC_USE_DEBUG) 5818 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5819 #endif 5820 { 5821 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5822 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5823 PetscBool roworiented = aij->roworiented; 5824 5825 /* Some Variables required in the macro */ 5826 Mat A = aij->A; 5827 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5828 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5829 MatScalar *aa = a->a; 5830 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5831 Mat B = aij->B; 5832 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5833 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5834 MatScalar *ba = b->a; 5835 5836 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5837 PetscInt nonew = a->nonew; 5838 MatScalar *ap1,*ap2; 5839 5840 PetscFunctionBegin; 5841 for (i=0; i<m; i++) { 5842 if (im[i] < 0) continue; 5843 #if defined(PETSC_USE_DEBUG) 5844 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5845 #endif 5846 if (im[i] >= rstart && im[i] < rend) { 5847 row = im[i] - rstart; 5848 lastcol1 = -1; 5849 rp1 = aj + ai[row]; 5850 ap1 = aa + ai[row]; 5851 rmax1 = aimax[row]; 5852 nrow1 = ailen[row]; 5853 low1 = 0; 5854 high1 = nrow1; 5855 lastcol2 = -1; 5856 rp2 = bj + bi[row]; 5857 ap2 = ba + bi[row]; 5858 rmax2 = bimax[row]; 5859 nrow2 = bilen[row]; 5860 low2 = 0; 5861 high2 = nrow2; 5862 5863 for (j=0; j<n; j++) { 5864 if (roworiented) value = v[i*n+j]; 5865 else value = v[i+j*m]; 5866 if (in[j] >= cstart && in[j] < cend) { 5867 col = in[j] - cstart; 5868 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5869 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5870 } else if (in[j] < 0) continue; 5871 #if defined(PETSC_USE_DEBUG) 5872 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5873 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5874 #endif 5875 else { 5876 if (mat->was_assembled) { 5877 if (!aij->colmap) { 5878 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5879 } 5880 #if defined(PETSC_USE_CTABLE) 5881 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5882 col--; 5883 #else 5884 col = aij->colmap[in[j]] - 1; 5885 #endif 5886 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5887 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5888 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5889 col = in[j]; 5890 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5891 B = aij->B; 5892 b = (Mat_SeqAIJ*)B->data; 5893 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5894 rp2 = bj + bi[row]; 5895 ap2 = ba + bi[row]; 5896 rmax2 = bimax[row]; 5897 nrow2 = bilen[row]; 5898 low2 = 0; 5899 high2 = nrow2; 5900 bm = aij->B->rmap->n; 5901 ba = b->a; 5902 } 5903 } else col = in[j]; 5904 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5905 } 5906 } 5907 } else if (!aij->donotstash) { 5908 if (roworiented) { 5909 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5910 } else { 5911 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5912 } 5913 } 5914 } 5915 } 5916 PetscFunctionReturnVoid(); 5917 } 5918