1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* 636 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 637 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 638 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 639 */ 640 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 641 { 642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 643 Mat A = aij->A; /* diagonal part of the matrix */ 644 Mat B = aij->B; /* offdiagonal part of the matrix */ 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 646 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 647 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 648 PetscInt *ailen = a->ilen,*aj = a->j; 649 PetscInt *bilen = b->ilen,*bj = b->j; 650 PetscInt am = aij->A->rmap->n,j; 651 PetscInt diag_so_far = 0,dnz; 652 PetscInt offd_so_far = 0,onz; 653 654 PetscFunctionBegin; 655 /* Iterate over all rows of the matrix */ 656 for (j=0; j<am; j++) { 657 dnz = onz = 0; 658 /* Iterate over all non-zero columns of the current row */ 659 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 660 /* If column is in the diagonal */ 661 if (mat_j[col] >= cstart && mat_j[col] < cend) { 662 aj[diag_so_far++] = mat_j[col] - cstart; 663 dnz++; 664 } else { /* off-diagonal entries */ 665 bj[offd_so_far++] = mat_j[col]; 666 onz++; 667 } 668 } 669 ailen[j] = dnz; 670 bilen[j] = onz; 671 } 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 677 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 678 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 679 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 680 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 681 */ 682 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 Mat A = aij->A; /* diagonal part of the matrix */ 686 Mat B = aij->B; /* offdiagonal part of the matrix */ 687 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 689 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 690 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 691 PetscInt *ailen = a->ilen,*aj = a->j; 692 PetscInt *bilen = b->ilen,*bj = b->j; 693 PetscInt am = aij->A->rmap->n,j; 694 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 695 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 696 PetscScalar *aa = a->a,*ba = b->a; 697 698 PetscFunctionBegin; 699 /* Iterate over all rows of the matrix */ 700 for (j=0; j<am; j++) { 701 dnz_row = onz_row = 0; 702 rowstart_offd = full_offd_i[j]; 703 rowstart_diag = full_diag_i[j]; 704 /* Iterate over all non-zero columns of the current row */ 705 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 706 /* If column is in the diagonal */ 707 if (mat_j[col] >= cstart && mat_j[col] < cend) { 708 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 709 aa[rowstart_diag+dnz_row] = mat_a[col]; 710 dnz_row++; 711 } else { /* off-diagonal entries */ 712 bj[rowstart_offd+onz_row] = mat_j[col]; 713 ba[rowstart_offd+onz_row] = mat_a[col]; 714 onz_row++; 715 } 716 } 717 ailen[j] = dnz_row; 718 bilen[j] = onz_row; 719 } 720 PetscFunctionReturn(0); 721 } 722 723 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 726 PetscErrorCode ierr; 727 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 728 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 729 730 PetscFunctionBegin; 731 for (i=0; i<m; i++) { 732 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 733 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 734 if (idxm[i] >= rstart && idxm[i] < rend) { 735 row = idxm[i] - rstart; 736 for (j=0; j<n; j++) { 737 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 738 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 739 if (idxn[j] >= cstart && idxn[j] < cend) { 740 col = idxn[j] - cstart; 741 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 742 } else { 743 if (!aij->colmap) { 744 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 745 } 746 #if defined(PETSC_USE_CTABLE) 747 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 753 else { 754 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 755 } 756 } 757 } 758 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 759 } 760 PetscFunctionReturn(0); 761 } 762 763 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 764 765 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 PetscErrorCode ierr; 769 PetscInt nstash,reallocs; 770 771 PetscFunctionBegin; 772 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 773 774 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 775 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 776 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 781 { 782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 784 PetscErrorCode ierr; 785 PetscMPIInt n; 786 PetscInt i,j,rstart,ncols,flg; 787 PetscInt *row,*col; 788 PetscBool other_disassembled; 789 PetscScalar *val; 790 791 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 792 793 PetscFunctionBegin; 794 if (!aij->donotstash && !mat->nooffprocentries) { 795 while (1) { 796 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 797 if (!flg) break; 798 799 for (i=0; i<n; ) { 800 /* Now identify the consecutive vals belonging to the same row */ 801 for (j=i,rstart=row[j]; j<n; j++) { 802 if (row[j] != rstart) break; 803 } 804 if (j < n) ncols = j-i; 805 else ncols = n-i; 806 /* Now assemble all these values with a single function call */ 807 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 808 809 i = j; 810 } 811 } 812 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 813 } 814 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 815 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 816 817 /* determine if any processor has disassembled, if so we must 818 also disassemble ourselfs, in order that we may reassemble. */ 819 /* 820 if nonzero structure of submatrix B cannot change then we know that 821 no processor disassembled thus we can skip this stuff 822 */ 823 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 824 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 825 if (mat->was_assembled && !other_disassembled) { 826 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 827 } 828 } 829 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 830 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 831 } 832 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 833 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 834 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 835 836 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 837 838 aij->rowvalues = 0; 839 840 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 841 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 842 843 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 844 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 845 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 846 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 847 } 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 852 { 853 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 854 PetscErrorCode ierr; 855 856 PetscFunctionBegin; 857 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 858 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 859 PetscFunctionReturn(0); 860 } 861 862 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 PetscErrorCode ierr; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 873 /* fix right hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 879 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 880 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 881 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 882 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 883 } 884 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 885 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 886 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 887 if ((diag != 0.0) && cong) { 888 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 889 } else if (diag != 0.0) { 890 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 891 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 892 for (r = 0; r < len; ++r) { 893 const PetscInt row = lrows[r] + A->rmap->rstart; 894 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 895 } 896 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 897 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 898 } else { 899 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 900 } 901 ierr = PetscFree(lrows);CHKERRQ(ierr); 902 903 /* only change matrix nonzero state if pattern was allowed to be changed */ 904 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 905 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 906 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 907 } 908 PetscFunctionReturn(0); 909 } 910 911 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 912 { 913 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 914 PetscErrorCode ierr; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i,j,r,m,p = 0,len = 0; 917 PetscInt *lrows,*owners = A->rmap->range; 918 PetscSFNode *rrows; 919 PetscSF sf; 920 const PetscScalar *xx; 921 PetscScalar *bb,*mask; 922 Vec xmask,lmask; 923 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 924 const PetscInt *aj, *ii,*ridx; 925 PetscScalar *aa; 926 927 PetscFunctionBegin; 928 /* Create SF where leaves are input rows and roots are owned rows */ 929 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 930 for (r = 0; r < n; ++r) lrows[r] = -1; 931 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 932 for (r = 0; r < N; ++r) { 933 const PetscInt idx = rows[r]; 934 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 935 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 936 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 937 } 938 rrows[r].rank = p; 939 rrows[r].index = rows[r] - owners[p]; 940 } 941 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 942 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 943 /* Collect flags for rows to be zeroed */ 944 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 945 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 947 /* Compress and put in row numbers */ 948 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 949 /* zero diagonal part of matrix */ 950 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 951 /* handle off diagonal part of matrix */ 952 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 953 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 954 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 955 for (i=0; i<len; i++) bb[lrows[i]] = 1; 956 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 957 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 960 if (x) { 961 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 964 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 965 } 966 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 967 /* remove zeroed rows of off diagonal matrix */ 968 ii = aij->i; 969 for (i=0; i<len; i++) { 970 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 971 } 972 /* loop over all elements of off process part of matrix zeroing removed columns*/ 973 if (aij->compressedrow.use) { 974 m = aij->compressedrow.nrows; 975 ii = aij->compressedrow.i; 976 ridx = aij->compressedrow.rindex; 977 for (i=0; i<m; i++) { 978 n = ii[i+1] - ii[i]; 979 aj = aij->j + ii[i]; 980 aa = aij->a + ii[i]; 981 982 for (j=0; j<n; j++) { 983 if (PetscAbsScalar(mask[*aj])) { 984 if (b) bb[*ridx] -= *aa*xx[*aj]; 985 *aa = 0.0; 986 } 987 aa++; 988 aj++; 989 } 990 ridx++; 991 } 992 } else { /* do not use compressed row format */ 993 m = l->B->rmap->n; 994 for (i=0; i<m; i++) { 995 n = ii[i+1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij->a + ii[i]; 998 for (j=0; j<n; j++) { 999 if (PetscAbsScalar(mask[*aj])) { 1000 if (b) bb[i] -= *aa*xx[*aj]; 1001 *aa = 0.0; 1002 } 1003 aa++; 1004 aj++; 1005 } 1006 } 1007 } 1008 if (x) { 1009 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1010 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1011 } 1012 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1013 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1014 ierr = PetscFree(lrows);CHKERRQ(ierr); 1015 1016 /* only change matrix nonzero state if pattern was allowed to be changed */ 1017 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1018 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1019 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1025 { 1026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1027 PetscErrorCode ierr; 1028 PetscInt nt; 1029 VecScatter Mvctx = a->Mvctx; 1030 1031 PetscFunctionBegin; 1032 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1033 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1034 1035 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1039 PetscFunctionReturn(0); 1040 } 1041 1042 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1045 PetscErrorCode ierr; 1046 1047 PetscFunctionBegin; 1048 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 PetscErrorCode ierr; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1060 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1063 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscBool merged; 1072 1073 PetscFunctionBegin; 1074 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1075 /* do nondiagonal part */ 1076 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1077 if (!merged) { 1078 /* send it on its way */ 1079 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1080 /* do local part */ 1081 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1082 /* receive remote parts: note this assumes the values are not actually */ 1083 /* added in yy until the next line, */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 } else { 1086 /* do local part */ 1087 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1088 /* send it on its way */ 1089 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1090 /* values actually were received in the Begin() but we need to call this nop */ 1091 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1097 { 1098 MPI_Comm comm; 1099 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1100 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1101 IS Me,Notme; 1102 PetscErrorCode ierr; 1103 PetscInt M,N,first,last,*notme,i; 1104 PetscMPIInt size; 1105 1106 PetscFunctionBegin; 1107 /* Easy test: symmetric diagonal block */ 1108 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1109 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1110 if (!*f) PetscFunctionReturn(0); 1111 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1113 if (size == 1) PetscFunctionReturn(0); 1114 1115 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1116 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1117 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1119 for (i=0; i<first; i++) notme[i] = i; 1120 for (i=last; i<M; i++) notme[i-last+first] = i; 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1122 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1123 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1124 Aoff = Aoffs[0]; 1125 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1126 Boff = Boffs[0]; 1127 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1128 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1129 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1130 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1131 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1132 ierr = PetscFree(notme);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1137 { 1138 PetscErrorCode ierr; 1139 1140 PetscFunctionBegin; 1141 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* send it on its way */ 1154 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1155 /* do local part */ 1156 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1157 /* receive remote parts */ 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1167 { 1168 PetscErrorCode ierr; 1169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1170 1171 PetscFunctionBegin; 1172 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1173 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1174 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1179 { 1180 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1181 PetscErrorCode ierr; 1182 1183 PetscFunctionBegin; 1184 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1185 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 #if defined(PETSC_USE_LOG) 1196 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1197 #endif 1198 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1199 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1200 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1201 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1202 #if defined(PETSC_USE_CTABLE) 1203 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1204 #else 1205 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1206 #endif 1207 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1208 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1209 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1210 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1211 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1212 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1213 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1224 #if defined(PETSC_HAVE_ELEMENTAL) 1225 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1226 #endif 1227 #if defined(PETSC_HAVE_HYPRE) 1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1230 #endif 1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1239 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1240 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1241 PetscErrorCode ierr; 1242 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1243 int fd; 1244 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1245 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1246 PetscScalar *column_values; 1247 PetscInt message_count,flowcontrolcount; 1248 FILE *file; 1249 1250 PetscFunctionBegin; 1251 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1252 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1253 nz = A->nz + B->nz; 1254 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1255 if (!rank) { 1256 header[0] = MAT_FILE_CLASSID; 1257 header[1] = mat->rmap->N; 1258 header[2] = mat->cmap->N; 1259 1260 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 /* get largest number of rows any processor has */ 1263 rlen = mat->rmap->n; 1264 range = mat->rmap->range; 1265 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1266 } else { 1267 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 rlen = mat->rmap->n; 1269 } 1270 1271 /* load up the local row counts */ 1272 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1273 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1274 1275 /* store the row lengths to the file */ 1276 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1277 if (!rank) { 1278 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1279 for (i=1; i<size; i++) { 1280 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1281 rlen = range[i+1] - range[i]; 1282 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1283 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 } 1285 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1288 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1290 } 1291 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1292 1293 /* load up the local column indices */ 1294 nzmax = nz; /* th processor needs space a largest processor needs */ 1295 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1297 cnt = 0; 1298 for (i=0; i<mat->rmap->n; i++) { 1299 for (j=B->i[i]; j<B->i[i+1]; j++) { 1300 if ((col = garray[B->j[j]]) > cstart) break; 1301 column_indices[cnt++] = col; 1302 } 1303 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1304 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1305 } 1306 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1307 1308 /* store the column indices to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 MPI_Status status; 1312 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 for (i=1; i<size; i++) { 1314 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1315 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1316 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1317 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1328 1329 /* load up the local column values */ 1330 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1331 cnt = 0; 1332 for (i=0; i<mat->rmap->n; i++) { 1333 for (j=B->i[i]; j<B->i[i+1]; j++) { 1334 if (garray[B->j[j]] > cstart) break; 1335 column_values[cnt++] = B->a[j]; 1336 } 1337 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1338 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1339 } 1340 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1341 1342 /* store the column values to the file */ 1343 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1344 if (!rank) { 1345 MPI_Status status; 1346 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1347 for (i=1; i<size; i++) { 1348 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1349 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1350 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1351 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1352 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1353 } 1354 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1355 } else { 1356 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1357 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1359 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1360 } 1361 ierr = PetscFree(column_values);CHKERRQ(ierr); 1362 1363 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1364 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #include <petscdraw.h> 1369 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1370 { 1371 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1372 PetscErrorCode ierr; 1373 PetscMPIInt rank = aij->rank,size = aij->size; 1374 PetscBool isdraw,iascii,isbinary; 1375 PetscViewer sviewer; 1376 PetscViewerFormat format; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1380 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1382 if (iascii) { 1383 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1384 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1385 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1386 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1387 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1388 for (i=0; i<(PetscInt)size; i++) { 1389 nmax = PetscMax(nmax,nz[i]); 1390 nmin = PetscMin(nmin,nz[i]); 1391 navg += nz[i]; 1392 } 1393 ierr = PetscFree(nz);CHKERRQ(ierr); 1394 navg = navg/size; 1395 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1399 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1400 MatInfo info; 1401 PetscBool inodes; 1402 1403 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1404 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1405 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1406 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1407 if (!inodes) { 1408 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1409 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1410 } else { 1411 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1412 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1413 } 1414 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1416 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1418 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1420 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1421 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1424 PetscInt inodecount,inodelimit,*inodes; 1425 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1426 if (inodes) { 1427 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1433 PetscFunctionReturn(0); 1434 } 1435 } else if (isbinary) { 1436 if (size == 1) { 1437 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1438 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1439 } else { 1440 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (isdraw) { 1444 PetscDraw draw; 1445 PetscBool isnull; 1446 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1447 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1448 if (isnull) PetscFunctionReturn(0); 1449 } 1450 1451 { 1452 /* assemble the entire matrix onto first processor. */ 1453 Mat A; 1454 Mat_SeqAIJ *Aloc; 1455 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1456 MatScalar *a; 1457 1458 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1461 } else { 1462 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1463 } 1464 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1465 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1466 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1467 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1469 1470 /* copy over the A part */ 1471 Aloc = (Mat_SeqAIJ*)aij->A->data; 1472 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1473 row = mat->rmap->rstart; 1474 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1475 for (i=0; i<m; i++) { 1476 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1477 row++; 1478 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1479 } 1480 aj = Aloc->j; 1481 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1482 1483 /* copy over the B part */ 1484 Aloc = (Mat_SeqAIJ*)aij->B->data; 1485 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1486 row = mat->rmap->rstart; 1487 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1488 ct = cols; 1489 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1490 for (i=0; i<m; i++) { 1491 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1492 row++; 1493 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1494 } 1495 ierr = PetscFree(ct);CHKERRQ(ierr); 1496 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1497 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1498 /* 1499 Everyone has to call to draw the matrix since the graphics waits are 1500 synchronized across all processors that share the PetscDraw object 1501 */ 1502 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 if (!rank) { 1504 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1505 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1506 } 1507 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1508 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1515 { 1516 PetscErrorCode ierr; 1517 PetscBool iascii,isdraw,issocket,isbinary; 1518 1519 PetscFunctionBegin; 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1522 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1523 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1524 if (iascii || isdraw || isbinary || issocket) { 1525 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1531 { 1532 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1533 PetscErrorCode ierr; 1534 Vec bb1 = 0; 1535 PetscBool hasop; 1536 1537 PetscFunctionBegin; 1538 if (flag == SOR_APPLY_UPPER) { 1539 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1544 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1545 } 1546 1547 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1548 if (flag & SOR_ZERO_INITIAL_GUESS) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 its--; 1551 } 1552 1553 while (its--) { 1554 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1556 1557 /* update rhs: bb1 = bb - B*x */ 1558 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1559 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1560 1561 /* local sweep */ 1562 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1563 } 1564 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 while (its--) { 1570 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 1573 /* update rhs: bb1 = bb - B*x */ 1574 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1575 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1579 } 1580 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1581 if (flag & SOR_ZERO_INITIAL_GUESS) { 1582 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1583 its--; 1584 } 1585 while (its--) { 1586 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 1589 /* update rhs: bb1 = bb - B*x */ 1590 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1591 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1592 1593 /* local sweep */ 1594 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1595 } 1596 } else if (flag & SOR_EISENSTAT) { 1597 Vec xx1; 1598 1599 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1600 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1601 1602 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (!mat->diag) { 1605 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1606 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1607 } 1608 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1609 if (hasop) { 1610 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1611 } else { 1612 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1613 } 1614 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1615 1616 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1617 1618 /* local sweep */ 1619 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1620 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1621 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1622 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1623 1624 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1625 1626 matin->factorerrortype = mat->A->factorerrortype; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1631 { 1632 Mat aA,aB,Aperm; 1633 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1634 PetscScalar *aa,*ba; 1635 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1636 PetscSF rowsf,sf; 1637 IS parcolp = NULL; 1638 PetscBool done; 1639 PetscErrorCode ierr; 1640 1641 PetscFunctionBegin; 1642 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1643 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1644 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1645 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1646 1647 /* Invert row permutation to find out where my rows should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1651 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1652 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 1655 /* Invert column permutation to find out where my columns should go */ 1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1657 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1658 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1659 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1660 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1661 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1662 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1663 1664 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1665 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1666 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1667 1668 /* Find out where my gcols should go */ 1669 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1670 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1672 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1673 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1674 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1675 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1676 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1677 1678 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1679 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1680 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1681 for (i=0; i<m; i++) { 1682 PetscInt row = rdest[i],rowner; 1683 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1684 for (j=ai[i]; j<ai[i+1]; j++) { 1685 PetscInt cowner,col = cdest[aj[j]]; 1686 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1687 if (rowner == cowner) dnnz[i]++; 1688 else onnz[i]++; 1689 } 1690 for (j=bi[i]; j<bi[i+1]; j++) { 1691 PetscInt cowner,col = gcdest[bj[j]]; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscReal isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 case MAT_SPD: 1824 A->spd_set = PETSC_TRUE; 1825 A->spd = flg; 1826 if (flg) { 1827 A->symmetric = PETSC_TRUE; 1828 A->structurally_symmetric = PETSC_TRUE; 1829 A->symmetric_set = PETSC_TRUE; 1830 A->structurally_symmetric_set = PETSC_TRUE; 1831 } 1832 break; 1833 case MAT_SYMMETRIC: 1834 MatCheckPreallocated(A,1); 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 case MAT_STRUCTURALLY_SYMMETRIC: 1838 MatCheckPreallocated(A,1); 1839 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1840 break; 1841 case MAT_HERMITIAN: 1842 MatCheckPreallocated(A,1); 1843 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_SYMMETRY_ETERNAL: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 break; 1849 case MAT_SUBMAT_SINGLEIS: 1850 A->submat_singleis = flg; 1851 break; 1852 case MAT_STRUCTURE_ONLY: 1853 /* The option is handled directly by MatSetOption() */ 1854 break; 1855 default: 1856 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1862 { 1863 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1864 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1865 PetscErrorCode ierr; 1866 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1867 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1868 PetscInt *cmap,*idx_p; 1869 1870 PetscFunctionBegin; 1871 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1872 mat->getrowactive = PETSC_TRUE; 1873 1874 if (!mat->rowvalues && (idx || v)) { 1875 /* 1876 allocate enough space to hold information from the longest row. 1877 */ 1878 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1879 PetscInt max = 1,tmp; 1880 for (i=0; i<matin->rmap->n; i++) { 1881 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1882 if (max < tmp) max = tmp; 1883 } 1884 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1885 } 1886 1887 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1888 lrow = row - rstart; 1889 1890 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1891 if (!v) {pvA = 0; pvB = 0;} 1892 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1893 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1894 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1895 nztot = nzA + nzB; 1896 1897 cmap = mat->garray; 1898 if (v || idx) { 1899 if (nztot) { 1900 /* Sort by increasing column numbers, assuming A and B already sorted */ 1901 PetscInt imark = -1; 1902 if (v) { 1903 *v = v_p = mat->rowvalues; 1904 for (i=0; i<nzB; i++) { 1905 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1906 else break; 1907 } 1908 imark = i; 1909 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1910 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1911 } 1912 if (idx) { 1913 *idx = idx_p = mat->rowindices; 1914 if (imark > -1) { 1915 for (i=0; i<imark; i++) { 1916 idx_p[i] = cmap[cworkB[i]]; 1917 } 1918 } else { 1919 for (i=0; i<nzB; i++) { 1920 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1921 else break; 1922 } 1923 imark = i; 1924 } 1925 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1926 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1927 } 1928 } else { 1929 if (idx) *idx = 0; 1930 if (v) *v = 0; 1931 } 1932 } 1933 *nz = nztot; 1934 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1935 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1940 { 1941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1942 1943 PetscFunctionBegin; 1944 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1945 aij->getrowactive = PETSC_FALSE; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1950 { 1951 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1952 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt i,j,cstart = mat->cmap->rstart; 1955 PetscReal sum = 0.0; 1956 MatScalar *v; 1957 1958 PetscFunctionBegin; 1959 if (aij->size == 1) { 1960 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1961 } else { 1962 if (type == NORM_FROBENIUS) { 1963 v = amat->a; 1964 for (i=0; i<amat->nz; i++) { 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 } 1967 v = bmat->a; 1968 for (i=0; i<bmat->nz; i++) { 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 } 1971 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1972 *norm = PetscSqrtReal(*norm); 1973 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1974 } else if (type == NORM_1) { /* max column norm */ 1975 PetscReal *tmp,*tmp2; 1976 PetscInt *jj,*garray = aij->garray; 1977 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1978 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1979 *norm = 0.0; 1980 v = amat->a; jj = amat->j; 1981 for (j=0; j<amat->nz; j++) { 1982 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a; jj = bmat->j; 1985 for (j=0; j<bmat->nz; j++) { 1986 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1987 } 1988 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 for (j=0; j<mat->cmap->N; j++) { 1990 if (tmp2[j] > *norm) *norm = tmp2[j]; 1991 } 1992 ierr = PetscFree(tmp);CHKERRQ(ierr); 1993 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1994 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1995 } else if (type == NORM_INFINITY) { /* max row norm */ 1996 PetscReal ntemp = 0.0; 1997 for (j=0; j<aij->A->rmap->n; j++) { 1998 v = amat->a + amat->i[j]; 1999 sum = 0.0; 2000 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2001 sum += PetscAbsScalar(*v); v++; 2002 } 2003 v = bmat->a + bmat->i[j]; 2004 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2005 sum += PetscAbsScalar(*v); v++; 2006 } 2007 if (sum > ntemp) ntemp = sum; 2008 } 2009 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2010 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2011 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2017 { 2018 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2019 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2020 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2021 PetscErrorCode ierr; 2022 Mat B,A_diag,*B_diag; 2023 MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 } 2062 2063 b = (Mat_MPIAIJ*)B->data; 2064 A_diag = a->A; 2065 B_diag = &b->A; 2066 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2067 A_diag_ncol = A_diag->cmap->N; 2068 B_diag_ilen = sub_B_diag->ilen; 2069 B_diag_i = sub_B_diag->i; 2070 2071 /* Set ilen for diagonal of B */ 2072 for (i=0; i<A_diag_ncol; i++) { 2073 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2074 } 2075 2076 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2077 very quickly (=without using MatSetValues), because all writes are local. */ 2078 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2079 2080 /* copy over the B part */ 2081 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2082 array = Bloc->a; 2083 row = A->rmap->rstart; 2084 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2085 cols_tmp = cols; 2086 for (i=0; i<mb; i++) { 2087 ncol = bi[i+1]-bi[i]; 2088 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2089 row++; 2090 array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2105 { 2106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2107 Mat a = aij->A,b = aij->B; 2108 PetscErrorCode ierr; 2109 PetscInt s1,s2,s3; 2110 2111 PetscFunctionBegin; 2112 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2113 if (rr) { 2114 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2115 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2116 /* Overlap communication with computation. */ 2117 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 } 2119 if (ll) { 2120 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2121 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2122 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2123 } 2124 /* scale the diagonal block */ 2125 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2126 2127 if (rr) { 2128 /* Do a scatter end and then right scale the off-diagonal block */ 2129 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2130 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2146 { 2147 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2148 Mat a,b,c,d; 2149 PetscBool flg; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 a = matA->A; b = matA->B; 2154 c = matB->A; d = matB->B; 2155 2156 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2157 if (flg) { 2158 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2159 } 2160 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2165 { 2166 PetscErrorCode ierr; 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2168 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2169 2170 PetscFunctionBegin; 2171 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2172 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2173 /* because of the column compression in the off-processor part of the matrix a->B, 2174 the number of columns in a->B and b->B may be different, hence we cannot call 2175 the MatCopy() directly on the two parts. If need be, we can provide a more 2176 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2177 then copying the submatrices */ 2178 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2179 } else { 2180 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2181 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2182 } 2183 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2188 { 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /* 2197 Computes the number of nonzeros per row needed for preallocation when X and Y 2198 have different nonzero structure. 2199 */ 2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2201 { 2202 PetscInt i,j,k,nzx,nzy; 2203 2204 PetscFunctionBegin; 2205 /* Set the number of nonzeros in the new matrix */ 2206 for (i=0; i<m; i++) { 2207 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2208 nzx = xi[i+1] - xi[i]; 2209 nzy = yi[i+1] - yi[i]; 2210 nnz[i] = 0; 2211 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2212 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2213 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2214 nnz[i]++; 2215 } 2216 for (; k<nzy; k++) nnz[i]++; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2223 { 2224 PetscErrorCode ierr; 2225 PetscInt m = Y->rmap->N; 2226 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2227 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2238 PetscBLASInt bnz,one=1; 2239 Mat_SeqAIJ *x,*y; 2240 2241 PetscFunctionBegin; 2242 if (str == SAME_NONZERO_PATTERN) { 2243 PetscScalar alpha = a; 2244 x = (Mat_SeqAIJ*)xx->A->data; 2245 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2246 y = (Mat_SeqAIJ*)yy->A->data; 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 x = (Mat_SeqAIJ*)xx->B->data; 2249 y = (Mat_SeqAIJ*)yy->B->data; 2250 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2251 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2252 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2253 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2254 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2255 } else { 2256 Mat B; 2257 PetscInt *nnz_d,*nnz_o; 2258 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2259 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2260 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2261 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2262 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2263 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2264 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2265 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2266 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2267 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2268 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2269 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2270 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2271 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2272 } 2273 PetscFunctionReturn(0); 2274 } 2275 2276 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2277 2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2279 { 2280 #if defined(PETSC_USE_COMPLEX) 2281 PetscErrorCode ierr; 2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2283 2284 PetscFunctionBegin; 2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2287 #else 2288 PetscFunctionBegin; 2289 #endif 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2300 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 PetscInt i,*idxb = 0; 2320 PetscScalar *va,*vb; 2321 Vec vtmp; 2322 2323 PetscFunctionBegin; 2324 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2325 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2326 if (idx) { 2327 for (i=0; i<A->rmap->n; i++) { 2328 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2329 } 2330 } 2331 2332 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2333 if (idx) { 2334 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2335 } 2336 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2337 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2338 2339 for (i=0; i<A->rmap->n; i++) { 2340 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2341 va[i] = vb[i]; 2342 if (idx) idx[i] = a->garray[idxb[i]]; 2343 } 2344 } 2345 2346 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2347 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2348 ierr = PetscFree(idxb);CHKERRQ(ierr); 2349 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2354 { 2355 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2356 PetscErrorCode ierr; 2357 PetscInt i,*idxb = 0; 2358 PetscScalar *va,*vb; 2359 Vec vtmp; 2360 2361 PetscFunctionBegin; 2362 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2364 if (idx) { 2365 for (i=0; i<A->cmap->n; i++) { 2366 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2367 } 2368 } 2369 2370 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2371 if (idx) { 2372 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2373 } 2374 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2375 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2376 2377 for (i=0; i<A->rmap->n; i++) { 2378 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2379 va[i] = vb[i]; 2380 if (idx) idx[i] = a->garray[idxb[i]]; 2381 } 2382 } 2383 2384 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2385 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2386 ierr = PetscFree(idxb);CHKERRQ(ierr); 2387 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2392 { 2393 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2394 PetscInt n = A->rmap->n; 2395 PetscInt cstart = A->cmap->rstart; 2396 PetscInt *cmap = mat->garray; 2397 PetscInt *diagIdx, *offdiagIdx; 2398 Vec diagV, offdiagV; 2399 PetscScalar *a, *diagA, *offdiagA; 2400 PetscInt r; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2405 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2406 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2407 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2408 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2409 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2410 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2411 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2412 for (r = 0; r < n; ++r) { 2413 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2414 a[r] = diagA[r]; 2415 idx[r] = cstart + diagIdx[r]; 2416 } else { 2417 a[r] = offdiagA[r]; 2418 idx[r] = cmap[offdiagIdx[r]]; 2419 } 2420 } 2421 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2424 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2425 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2426 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2433 PetscInt n = A->rmap->n; 2434 PetscInt cstart = A->cmap->rstart; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 PetscInt r; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2447 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 for (r = 0; r < n; ++r) { 2452 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2453 a[r] = diagA[r]; 2454 idx[r] = cstart + diagIdx[r]; 2455 } else { 2456 a[r] = offdiagA[r]; 2457 idx[r] = cmap[offdiagIdx[r]]; 2458 } 2459 } 2460 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2470 { 2471 PetscErrorCode ierr; 2472 Mat *dummy; 2473 2474 PetscFunctionBegin; 2475 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2476 *newmat = *dummy; 2477 ierr = PetscFree(dummy);CHKERRQ(ierr); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2482 { 2483 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2484 PetscErrorCode ierr; 2485 2486 PetscFunctionBegin; 2487 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2488 A->factorerrortype = a->A->factorerrortype; 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2493 { 2494 PetscErrorCode ierr; 2495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2496 2497 PetscFunctionBegin; 2498 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2499 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2500 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2501 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2506 { 2507 PetscFunctionBegin; 2508 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2509 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2510 PetscFunctionReturn(0); 2511 } 2512 2513 /*@ 2514 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2515 2516 Collective on Mat 2517 2518 Input Parameters: 2519 + A - the matrix 2520 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2521 2522 Level: advanced 2523 2524 @*/ 2525 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2526 { 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2535 { 2536 PetscErrorCode ierr; 2537 PetscBool sc = PETSC_FALSE,flg; 2538 2539 PetscFunctionBegin; 2540 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2541 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2542 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2543 if (flg) { 2544 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2545 } 2546 ierr = PetscOptionsTail();CHKERRQ(ierr); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2551 { 2552 PetscErrorCode ierr; 2553 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2554 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2555 2556 PetscFunctionBegin; 2557 if (!Y->preallocated) { 2558 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2559 } else if (!aij->nz) { 2560 PetscInt nonew = aij->nonew; 2561 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2562 aij->nonew = nonew; 2563 } 2564 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2565 PetscFunctionReturn(0); 2566 } 2567 2568 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2571 PetscErrorCode ierr; 2572 2573 PetscFunctionBegin; 2574 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2575 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2576 if (d) { 2577 PetscInt rstart; 2578 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2579 *d += rstart; 2580 2581 } 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2586 { 2587 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2588 PetscErrorCode ierr; 2589 2590 PetscFunctionBegin; 2591 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 /* -------------------------------------------------------------------*/ 2596 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2597 MatGetRow_MPIAIJ, 2598 MatRestoreRow_MPIAIJ, 2599 MatMult_MPIAIJ, 2600 /* 4*/ MatMultAdd_MPIAIJ, 2601 MatMultTranspose_MPIAIJ, 2602 MatMultTransposeAdd_MPIAIJ, 2603 0, 2604 0, 2605 0, 2606 /*10*/ 0, 2607 0, 2608 0, 2609 MatSOR_MPIAIJ, 2610 MatTranspose_MPIAIJ, 2611 /*15*/ MatGetInfo_MPIAIJ, 2612 MatEqual_MPIAIJ, 2613 MatGetDiagonal_MPIAIJ, 2614 MatDiagonalScale_MPIAIJ, 2615 MatNorm_MPIAIJ, 2616 /*20*/ MatAssemblyBegin_MPIAIJ, 2617 MatAssemblyEnd_MPIAIJ, 2618 MatSetOption_MPIAIJ, 2619 MatZeroEntries_MPIAIJ, 2620 /*24*/ MatZeroRows_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*29*/ MatSetUp_MPIAIJ, 2626 0, 2627 0, 2628 MatGetDiagonalBlock_MPIAIJ, 2629 0, 2630 /*34*/ MatDuplicate_MPIAIJ, 2631 0, 2632 0, 2633 0, 2634 0, 2635 /*39*/ MatAXPY_MPIAIJ, 2636 MatCreateSubMatrices_MPIAIJ, 2637 MatIncreaseOverlap_MPIAIJ, 2638 MatGetValues_MPIAIJ, 2639 MatCopy_MPIAIJ, 2640 /*44*/ MatGetRowMax_MPIAIJ, 2641 MatScale_MPIAIJ, 2642 MatShift_MPIAIJ, 2643 MatDiagonalSet_MPIAIJ, 2644 MatZeroRowsColumns_MPIAIJ, 2645 /*49*/ MatSetRandom_MPIAIJ, 2646 0, 2647 0, 2648 0, 2649 0, 2650 /*54*/ MatFDColoringCreate_MPIXAIJ, 2651 0, 2652 MatSetUnfactored_MPIAIJ, 2653 MatPermute_MPIAIJ, 2654 0, 2655 /*59*/ MatCreateSubMatrix_MPIAIJ, 2656 MatDestroy_MPIAIJ, 2657 MatView_MPIAIJ, 2658 0, 2659 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2660 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2661 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2666 MatGetRowMinAbs_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 /*75*/ MatFDColoringApply_AIJ, 2672 MatSetFromOptions_MPIAIJ, 2673 0, 2674 0, 2675 MatFindZeroDiagonals_MPIAIJ, 2676 /*80*/ 0, 2677 0, 2678 0, 2679 /*83*/ MatLoad_MPIAIJ, 2680 MatIsSymmetric_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2688 MatPtAP_MPIAIJ_MPIAIJ, 2689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2690 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2691 0, 2692 0, 2693 0, 2694 0, 2695 /*99*/ 0, 2696 0, 2697 0, 2698 MatConjugate_MPIAIJ, 2699 0, 2700 /*104*/MatSetValuesRow_MPIAIJ, 2701 MatRealPart_MPIAIJ, 2702 MatImaginaryPart_MPIAIJ, 2703 0, 2704 0, 2705 /*109*/0, 2706 0, 2707 MatGetRowMin_MPIAIJ, 2708 0, 2709 MatMissingDiagonal_MPIAIJ, 2710 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2711 0, 2712 MatGetGhosts_MPIAIJ, 2713 0, 2714 0, 2715 /*119*/0, 2716 0, 2717 0, 2718 0, 2719 MatGetMultiProcBlock_MPIAIJ, 2720 /*124*/MatFindNonzeroRows_MPIAIJ, 2721 MatGetColumnNorms_MPIAIJ, 2722 MatInvertBlockDiagonal_MPIAIJ, 2723 MatInvertVariableBlockDiagonal_MPIAIJ, 2724 MatCreateSubMatricesMPI_MPIAIJ, 2725 /*129*/0, 2726 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2727 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2728 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2729 0, 2730 /*134*/0, 2731 0, 2732 MatRARt_MPIAIJ_MPIAIJ, 2733 0, 2734 0, 2735 /*139*/MatSetBlockSizes_MPIAIJ, 2736 0, 2737 0, 2738 MatFDColoringSetUp_MPIXAIJ, 2739 MatFindOffBlockDiagonalEntries_MPIAIJ, 2740 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2741 }; 2742 2743 /* ----------------------------------------------------------------------------------------*/ 2744 2745 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2746 { 2747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2748 PetscErrorCode ierr; 2749 2750 PetscFunctionBegin; 2751 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2752 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2753 PetscFunctionReturn(0); 2754 } 2755 2756 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2757 { 2758 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2759 PetscErrorCode ierr; 2760 2761 PetscFunctionBegin; 2762 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2763 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2768 { 2769 Mat_MPIAIJ *b; 2770 PetscErrorCode ierr; 2771 2772 PetscFunctionBegin; 2773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2775 b = (Mat_MPIAIJ*)B->data; 2776 2777 #if defined(PETSC_USE_CTABLE) 2778 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2779 #else 2780 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2781 #endif 2782 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2783 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2784 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2785 2786 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2787 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2788 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2789 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2790 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2791 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2793 2794 if (!B->preallocated) { 2795 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2796 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2797 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2798 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2800 } 2801 2802 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2803 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2804 B->preallocated = PETSC_TRUE; 2805 B->was_assembled = PETSC_FALSE; 2806 B->assembled = PETSC_FALSE;; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2811 { 2812 Mat_MPIAIJ *b; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2817 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2818 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2819 b = (Mat_MPIAIJ*)B->data; 2820 2821 #if defined(PETSC_USE_CTABLE) 2822 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2823 #else 2824 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2825 #endif 2826 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2827 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2828 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2829 2830 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2831 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2832 B->preallocated = PETSC_TRUE; 2833 B->was_assembled = PETSC_FALSE; 2834 B->assembled = PETSC_FALSE; 2835 PetscFunctionReturn(0); 2836 } 2837 2838 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2839 { 2840 Mat mat; 2841 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2842 PetscErrorCode ierr; 2843 2844 PetscFunctionBegin; 2845 *newmat = 0; 2846 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2847 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2848 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2849 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2850 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2851 a = (Mat_MPIAIJ*)mat->data; 2852 2853 mat->factortype = matin->factortype; 2854 mat->assembled = PETSC_TRUE; 2855 mat->insertmode = NOT_SET_VALUES; 2856 mat->preallocated = PETSC_TRUE; 2857 2858 a->size = oldmat->size; 2859 a->rank = oldmat->rank; 2860 a->donotstash = oldmat->donotstash; 2861 a->roworiented = oldmat->roworiented; 2862 a->rowindices = 0; 2863 a->rowvalues = 0; 2864 a->getrowactive = PETSC_FALSE; 2865 2866 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2867 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2868 2869 if (oldmat->colmap) { 2870 #if defined(PETSC_USE_CTABLE) 2871 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2872 #else 2873 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2874 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2875 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2876 #endif 2877 } else a->colmap = 0; 2878 if (oldmat->garray) { 2879 PetscInt len; 2880 len = oldmat->B->cmap->n; 2881 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2882 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2883 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2884 } else a->garray = 0; 2885 2886 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2888 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2890 2891 if (oldmat->Mvctx_mpi1) { 2892 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2894 } 2895 2896 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2898 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2900 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2901 *newmat = mat; 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2906 { 2907 PetscScalar *vals,*svals; 2908 MPI_Comm comm; 2909 PetscErrorCode ierr; 2910 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2911 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2912 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2913 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2914 PetscInt cend,cstart,n,*rowners; 2915 int fd; 2916 PetscInt bs = newMat->rmap->bs; 2917 2918 PetscFunctionBegin; 2919 /* force binary viewer to load .info file if it has not yet done so */ 2920 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2921 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2922 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2923 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2924 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2925 if (!rank) { 2926 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2927 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2928 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2929 } 2930 2931 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2932 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2933 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2934 if (bs < 0) bs = 1; 2935 2936 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2937 M = header[1]; N = header[2]; 2938 2939 /* If global sizes are set, check if they are consistent with that given in the file */ 2940 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2941 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2942 2943 /* determine ownership of all (block) rows */ 2944 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2945 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2946 else m = newMat->rmap->n; /* Set by user */ 2947 2948 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2949 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2950 2951 /* First process needs enough room for process with most rows */ 2952 if (!rank) { 2953 mmax = rowners[1]; 2954 for (i=2; i<=size; i++) { 2955 mmax = PetscMax(mmax, rowners[i]); 2956 } 2957 } else mmax = -1; /* unused, but compilers complain */ 2958 2959 rowners[0] = 0; 2960 for (i=2; i<=size; i++) { 2961 rowners[i] += rowners[i-1]; 2962 } 2963 rstart = rowners[rank]; 2964 rend = rowners[rank+1]; 2965 2966 /* distribute row lengths to all processors */ 2967 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2968 if (!rank) { 2969 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2970 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2971 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2972 for (j=0; j<m; j++) { 2973 procsnz[0] += ourlens[j]; 2974 } 2975 for (i=1; i<size; i++) { 2976 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2977 /* calculate the number of nonzeros on each processor */ 2978 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2979 procsnz[i] += rowlengths[j]; 2980 } 2981 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2982 } 2983 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2984 } else { 2985 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2986 } 2987 2988 if (!rank) { 2989 /* determine max buffer needed and allocate it */ 2990 maxnz = 0; 2991 for (i=0; i<size; i++) { 2992 maxnz = PetscMax(maxnz,procsnz[i]); 2993 } 2994 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2995 2996 /* read in my part of the matrix column indices */ 2997 nz = procsnz[0]; 2998 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2999 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3000 3001 /* read in every one elses and ship off */ 3002 for (i=1; i<size; i++) { 3003 nz = procsnz[i]; 3004 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3005 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3006 } 3007 ierr = PetscFree(cols);CHKERRQ(ierr); 3008 } else { 3009 /* determine buffer space needed for message */ 3010 nz = 0; 3011 for (i=0; i<m; i++) { 3012 nz += ourlens[i]; 3013 } 3014 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3015 3016 /* receive message of column indices*/ 3017 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3018 } 3019 3020 /* determine column ownership if matrix is not square */ 3021 if (N != M) { 3022 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3023 else n = newMat->cmap->n; 3024 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3025 cstart = cend - n; 3026 } else { 3027 cstart = rstart; 3028 cend = rend; 3029 n = cend - cstart; 3030 } 3031 3032 /* loop over local rows, determining number of off diagonal entries */ 3033 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3034 jj = 0; 3035 for (i=0; i<m; i++) { 3036 for (j=0; j<ourlens[i]; j++) { 3037 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3038 jj++; 3039 } 3040 } 3041 3042 for (i=0; i<m; i++) { 3043 ourlens[i] -= offlens[i]; 3044 } 3045 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3046 3047 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3048 3049 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3050 3051 for (i=0; i<m; i++) { 3052 ourlens[i] += offlens[i]; 3053 } 3054 3055 if (!rank) { 3056 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3057 3058 /* read in my part of the matrix numerical values */ 3059 nz = procsnz[0]; 3060 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3061 3062 /* insert into matrix */ 3063 jj = rstart; 3064 smycols = mycols; 3065 svals = vals; 3066 for (i=0; i<m; i++) { 3067 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3068 smycols += ourlens[i]; 3069 svals += ourlens[i]; 3070 jj++; 3071 } 3072 3073 /* read in other processors and ship out */ 3074 for (i=1; i<size; i++) { 3075 nz = procsnz[i]; 3076 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3077 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3078 } 3079 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3080 } else { 3081 /* receive numeric values */ 3082 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3083 3084 /* receive message of values*/ 3085 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3086 3087 /* insert into matrix */ 3088 jj = rstart; 3089 smycols = mycols; 3090 svals = vals; 3091 for (i=0; i<m; i++) { 3092 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3093 smycols += ourlens[i]; 3094 svals += ourlens[i]; 3095 jj++; 3096 } 3097 } 3098 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3099 ierr = PetscFree(vals);CHKERRQ(ierr); 3100 ierr = PetscFree(mycols);CHKERRQ(ierr); 3101 ierr = PetscFree(rowners);CHKERRQ(ierr); 3102 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3103 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3109 { 3110 PetscErrorCode ierr; 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride=0,gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3118 3119 if (isstride) { 3120 PetscInt start,len,mstart,mlen; 3121 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3122 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3123 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3124 if (mstart == start && mlen-mstart == len) lisstride = 1; 3125 } 3126 3127 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3128 if (gisstride) { 3129 PetscInt N; 3130 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3131 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3132 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3133 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3134 } else { 3135 PetscInt cbs; 3136 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3137 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3138 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 mat - matrix 3151 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 Output Parameter: 3156 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3157 iscol_o - sequential column index set for retrieving mat->B 3158 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3159 */ 3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3161 { 3162 PetscErrorCode ierr; 3163 Vec x,cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray,*cmaparray; 3166 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3167 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3168 Mat B=a->B; 3169 Vec lvec=a->lvec,lcmap; 3170 PetscInt i,cstart,cend,Bn=B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx=a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3176 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3180 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3181 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3182 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3183 3184 /* Get start indices */ 3185 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3186 isstart -= ncols; 3187 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3188 3189 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3190 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3191 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3192 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3193 for (i=0; i<ncols; i++) { 3194 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3197 } 3198 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3199 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3200 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3201 3202 /* Get iscol_d */ 3203 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3204 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3205 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3206 3207 /* Get isrow_d */ 3208 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3209 rstart = mat->rmap->rstart; 3210 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3211 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3212 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3213 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3214 3215 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3216 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3217 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3221 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3222 3223 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3224 3225 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3226 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3232 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3233 3234 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3235 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3236 for (i=0; i<Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3244 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3245 3246 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 ierr = PetscFree(idx);CHKERRQ(ierr); 3250 *garray = cmap1; 3251 3252 ierr = VecDestroy(&x);CHKERRQ(ierr); 3253 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3254 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3255 PetscFunctionReturn(0); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3260 { 3261 PetscErrorCode ierr; 3262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3263 Mat M = NULL; 3264 MPI_Comm comm; 3265 IS iscol_d,isrow_d,iscol_o; 3266 Mat Asub = NULL,Bsub = NULL; 3267 PetscInt n; 3268 3269 PetscFunctionBegin; 3270 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3271 3272 if (call == MAT_REUSE_MATRIX) { 3273 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3274 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3275 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3276 3277 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3278 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3279 3280 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3281 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3282 3283 /* Update diagonal and off-diagonal portions of submat */ 3284 asub = (Mat_MPIAIJ*)(*submat)->data; 3285 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3286 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3287 if (n) { 3288 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3289 } 3290 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3291 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3292 3293 } else { /* call == MAT_INITIAL_MATRIX) */ 3294 const PetscInt *garray; 3295 PetscInt BsubN; 3296 3297 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3298 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3299 3300 /* Create local submatrices Asub and Bsub */ 3301 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3302 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3303 3304 /* Create submatrix M */ 3305 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3306 3307 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3308 asub = (Mat_MPIAIJ*)M->data; 3309 3310 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3311 n = asub->B->cmap->N; 3312 if (BsubN > n) { 3313 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3314 const PetscInt *idx; 3315 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3316 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3317 3318 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3319 j = 0; 3320 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3321 for (i=0; i<n; i++) { 3322 if (j >= BsubN) break; 3323 while (subgarray[i] > garray[j]) j++; 3324 3325 if (subgarray[i] == garray[j]) { 3326 idx_new[i] = idx[j++]; 3327 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3328 } 3329 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3330 3331 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3332 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3333 3334 } else if (BsubN < n) { 3335 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3336 } 3337 3338 ierr = PetscFree(garray);CHKERRQ(ierr); 3339 *submat = M; 3340 3341 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3342 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3343 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3344 3345 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3346 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3347 3348 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3349 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3350 } 3351 PetscFunctionReturn(0); 3352 } 3353 3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3355 { 3356 PetscErrorCode ierr; 3357 IS iscol_local=NULL,isrow_d; 3358 PetscInt csize; 3359 PetscInt n,i,j,start,end; 3360 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3361 MPI_Comm comm; 3362 3363 PetscFunctionBegin; 3364 /* If isrow has same processor distribution as mat, 3365 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3366 if (call == MAT_REUSE_MATRIX) { 3367 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3368 if (isrow_d) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3371 } else { 3372 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3373 if (iscol_local) { 3374 sameRowDist = PETSC_TRUE; 3375 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3376 } 3377 } 3378 } else { 3379 /* Check if isrow has same processor distribution as mat */ 3380 sameDist[0] = PETSC_FALSE; 3381 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3382 if (!n) { 3383 sameDist[0] = PETSC_TRUE; 3384 } else { 3385 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3386 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3387 if (i >= start && j < end) { 3388 sameDist[0] = PETSC_TRUE; 3389 } 3390 } 3391 3392 /* Check if iscol has same processor distribution as mat */ 3393 sameDist[1] = PETSC_FALSE; 3394 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3395 if (!n) { 3396 sameDist[1] = PETSC_TRUE; 3397 } else { 3398 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3399 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3400 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3401 } 3402 3403 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3404 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3405 sameRowDist = tsameDist[0]; 3406 } 3407 3408 if (sameRowDist) { 3409 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3410 /* isrow and iscol have same processor distribution as mat */ 3411 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3412 PetscFunctionReturn(0); 3413 } else { /* sameRowDist */ 3414 /* isrow has same processor distribution as mat */ 3415 if (call == MAT_INITIAL_MATRIX) { 3416 PetscBool sorted; 3417 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3418 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3419 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3420 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3421 3422 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3423 if (sorted) { 3424 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3425 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 } else { /* call == MAT_REUSE_MATRIX */ 3429 IS iscol_sub; 3430 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3431 if (iscol_sub) { 3432 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } 3435 } 3436 } 3437 } 3438 3439 /* General case: iscol -> iscol_local which has global size of iscol */ 3440 if (call == MAT_REUSE_MATRIX) { 3441 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3442 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3443 } else { 3444 if (!iscol_local) { 3445 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3446 } 3447 } 3448 3449 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3450 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3451 3452 if (call == MAT_INITIAL_MATRIX) { 3453 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3454 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3455 } 3456 PetscFunctionReturn(0); 3457 } 3458 3459 /*@C 3460 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3461 and "off-diagonal" part of the matrix in CSR format. 3462 3463 Collective on MPI_Comm 3464 3465 Input Parameters: 3466 + comm - MPI communicator 3467 . A - "diagonal" portion of matrix 3468 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3469 - garray - global index of B columns 3470 3471 Output Parameter: 3472 . mat - the matrix, with input A as its local diagonal matrix 3473 Level: advanced 3474 3475 Notes: 3476 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3477 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3478 3479 .seealso: MatCreateMPIAIJWithSplitArrays() 3480 @*/ 3481 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3482 { 3483 PetscErrorCode ierr; 3484 Mat_MPIAIJ *maij; 3485 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3486 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3487 PetscScalar *oa=b->a; 3488 Mat Bnew; 3489 PetscInt m,n,N; 3490 3491 PetscFunctionBegin; 3492 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3493 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3494 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3495 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3496 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3497 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3498 3499 /* Get global columns of mat */ 3500 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3501 3502 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3503 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3504 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3505 maij = (Mat_MPIAIJ*)(*mat)->data; 3506 3507 (*mat)->preallocated = PETSC_TRUE; 3508 3509 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3510 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3511 3512 /* Set A as diagonal portion of *mat */ 3513 maij->A = A; 3514 3515 nz = oi[m]; 3516 for (i=0; i<nz; i++) { 3517 col = oj[i]; 3518 oj[i] = garray[col]; 3519 } 3520 3521 /* Set Bnew as off-diagonal portion of *mat */ 3522 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3523 bnew = (Mat_SeqAIJ*)Bnew->data; 3524 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3525 maij->B = Bnew; 3526 3527 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3528 3529 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3530 b->free_a = PETSC_FALSE; 3531 b->free_ij = PETSC_FALSE; 3532 ierr = MatDestroy(&B);CHKERRQ(ierr); 3533 3534 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3535 bnew->free_a = PETSC_TRUE; 3536 bnew->free_ij = PETSC_TRUE; 3537 3538 /* condense columns of maij->B */ 3539 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3540 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3541 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3542 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3543 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3544 PetscFunctionReturn(0); 3545 } 3546 3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3548 3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3550 { 3551 PetscErrorCode ierr; 3552 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3553 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3554 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3555 Mat M,Msub,B=a->B; 3556 MatScalar *aa; 3557 Mat_SeqAIJ *aij; 3558 PetscInt *garray = a->garray,*colsub,Ncols; 3559 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3560 IS iscol_sub,iscmap; 3561 const PetscInt *is_idx,*cmap; 3562 PetscBool allcolumns=PETSC_FALSE; 3563 MPI_Comm comm; 3564 3565 PetscFunctionBegin; 3566 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3567 3568 if (call == MAT_REUSE_MATRIX) { 3569 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3570 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3571 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3572 3573 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3574 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3575 3576 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3577 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3578 3579 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3580 3581 } else { /* call == MAT_INITIAL_MATRIX) */ 3582 PetscBool flg; 3583 3584 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3585 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3586 3587 /* (1) iscol -> nonscalable iscol_local */ 3588 /* Check for special case: each processor gets entire matrix columns */ 3589 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3590 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3591 if (allcolumns) { 3592 iscol_sub = iscol_local; 3593 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3594 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3595 3596 } else { 3597 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3598 PetscInt *idx,*cmap1,k; 3599 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3600 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3601 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3602 count = 0; 3603 k = 0; 3604 for (i=0; i<Ncols; i++) { 3605 j = is_idx[i]; 3606 if (j >= cstart && j < cend) { 3607 /* diagonal part of mat */ 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (Bn) { 3611 /* off-diagonal part of mat */ 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } else if (j > garray[k]) { 3616 while (j > garray[k] && k < Bn-1) k++; 3617 if (j == garray[k]) { 3618 idx[count] = j; 3619 cmap1[count++] = i; /* column index in submat */ 3620 } 3621 } 3622 } 3623 } 3624 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3625 3626 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3627 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3628 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3629 3630 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3631 } 3632 3633 /* (3) Create sequential Msub */ 3634 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3635 } 3636 3637 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3638 aij = (Mat_SeqAIJ*)(Msub)->data; 3639 ii = aij->i; 3640 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3641 3642 /* 3643 m - number of local rows 3644 Ncols - number of columns (same on all processors) 3645 rstart - first row in new global matrix generated 3646 */ 3647 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3648 3649 if (call == MAT_INITIAL_MATRIX) { 3650 /* (4) Create parallel newmat */ 3651 PetscMPIInt rank,size; 3652 PetscInt csize; 3653 3654 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3656 3657 /* 3658 Determine the number of non-zeros in the diagonal and off-diagonal 3659 portions of the matrix in order to do correct preallocation 3660 */ 3661 3662 /* first get start and end of "diagonal" columns */ 3663 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3664 if (csize == PETSC_DECIDE) { 3665 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3666 if (mglobal == Ncols) { /* square matrix */ 3667 nlocal = m; 3668 } else { 3669 nlocal = Ncols/size + ((Ncols % size) > rank); 3670 } 3671 } else { 3672 nlocal = csize; 3673 } 3674 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3675 rstart = rend - nlocal; 3676 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3677 3678 /* next, compute all the lengths */ 3679 jj = aij->j; 3680 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3681 olens = dlens + m; 3682 for (i=0; i<m; i++) { 3683 jend = ii[i+1] - ii[i]; 3684 olen = 0; 3685 dlen = 0; 3686 for (j=0; j<jend; j++) { 3687 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3688 else dlen++; 3689 jj++; 3690 } 3691 olens[i] = olen; 3692 dlens[i] = dlen; 3693 } 3694 3695 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3696 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3697 3698 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3699 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3700 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3701 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3702 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3703 ierr = PetscFree(dlens);CHKERRQ(ierr); 3704 3705 } else { /* call == MAT_REUSE_MATRIX */ 3706 M = *newmat; 3707 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3708 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3709 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3710 /* 3711 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3712 rather than the slower MatSetValues(). 3713 */ 3714 M->was_assembled = PETSC_TRUE; 3715 M->assembled = PETSC_FALSE; 3716 } 3717 3718 /* (5) Set values of Msub to *newmat */ 3719 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3720 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3721 3722 jj = aij->j; 3723 aa = aij->a; 3724 for (i=0; i<m; i++) { 3725 row = rstart + i; 3726 nz = ii[i+1] - ii[i]; 3727 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3728 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3729 jj += nz; aa += nz; 3730 } 3731 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3732 3733 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 3736 ierr = PetscFree(colsub);CHKERRQ(ierr); 3737 3738 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3739 if (call == MAT_INITIAL_MATRIX) { 3740 *newmat = M; 3741 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3742 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3743 3744 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3745 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3746 3747 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3748 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3749 3750 if (iscol_local) { 3751 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3752 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3753 } 3754 } 3755 PetscFunctionReturn(0); 3756 } 3757 3758 /* 3759 Not great since it makes two copies of the submatrix, first an SeqAIJ 3760 in local and then by concatenating the local matrices the end result. 3761 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3762 3763 Note: This requires a sequential iscol with all indices. 3764 */ 3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3766 { 3767 PetscErrorCode ierr; 3768 PetscMPIInt rank,size; 3769 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3770 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3771 Mat M,Mreuse; 3772 MatScalar *aa,*vwork; 3773 MPI_Comm comm; 3774 Mat_SeqAIJ *aij; 3775 PetscBool colflag,allcolumns=PETSC_FALSE; 3776 3777 PetscFunctionBegin; 3778 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3779 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3780 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3781 3782 /* Check for special case: each processor gets entire matrix columns */ 3783 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3784 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3785 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3786 3787 if (call == MAT_REUSE_MATRIX) { 3788 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3789 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3790 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3791 } else { 3792 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3793 } 3794 3795 /* 3796 m - number of local rows 3797 n - number of columns (same on all processors) 3798 rstart - first row in new global matrix generated 3799 */ 3800 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3801 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3802 if (call == MAT_INITIAL_MATRIX) { 3803 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3804 ii = aij->i; 3805 jj = aij->j; 3806 3807 /* 3808 Determine the number of non-zeros in the diagonal and off-diagonal 3809 portions of the matrix in order to do correct preallocation 3810 */ 3811 3812 /* first get start and end of "diagonal" columns */ 3813 if (csize == PETSC_DECIDE) { 3814 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3815 if (mglobal == n) { /* square matrix */ 3816 nlocal = m; 3817 } else { 3818 nlocal = n/size + ((n % size) > rank); 3819 } 3820 } else { 3821 nlocal = csize; 3822 } 3823 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3824 rstart = rend - nlocal; 3825 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3826 3827 /* next, compute all the lengths */ 3828 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3829 olens = dlens + m; 3830 for (i=0; i<m; i++) { 3831 jend = ii[i+1] - ii[i]; 3832 olen = 0; 3833 dlen = 0; 3834 for (j=0; j<jend; j++) { 3835 if (*jj < rstart || *jj >= rend) olen++; 3836 else dlen++; 3837 jj++; 3838 } 3839 olens[i] = olen; 3840 dlens[i] = dlen; 3841 } 3842 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3843 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3844 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3845 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3846 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3847 ierr = PetscFree(dlens);CHKERRQ(ierr); 3848 } else { 3849 PetscInt ml,nl; 3850 3851 M = *newmat; 3852 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3853 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3854 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3855 /* 3856 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3857 rather than the slower MatSetValues(). 3858 */ 3859 M->was_assembled = PETSC_TRUE; 3860 M->assembled = PETSC_FALSE; 3861 } 3862 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3863 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3864 ii = aij->i; 3865 jj = aij->j; 3866 aa = aij->a; 3867 for (i=0; i<m; i++) { 3868 row = rstart + i; 3869 nz = ii[i+1] - ii[i]; 3870 cwork = jj; jj += nz; 3871 vwork = aa; aa += nz; 3872 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3873 } 3874 3875 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3876 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3877 *newmat = M; 3878 3879 /* save submatrix used in processor for next request */ 3880 if (call == MAT_INITIAL_MATRIX) { 3881 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3882 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3883 } 3884 PetscFunctionReturn(0); 3885 } 3886 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3888 { 3889 PetscInt m,cstart, cend,j,nnz,i,d; 3890 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3891 const PetscInt *JJ; 3892 PetscScalar *values; 3893 PetscErrorCode ierr; 3894 PetscBool nooffprocentries; 3895 3896 PetscFunctionBegin; 3897 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3898 3899 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3900 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3901 m = B->rmap->n; 3902 cstart = B->cmap->rstart; 3903 cend = B->cmap->rend; 3904 rstart = B->rmap->rstart; 3905 3906 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3907 3908 #if defined(PETSC_USE_DEBUG) 3909 for (i=0; i<m; i++) { 3910 nnz = Ii[i+1]- Ii[i]; 3911 JJ = J + Ii[i]; 3912 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3913 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3914 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3915 } 3916 #endif 3917 3918 for (i=0; i<m; i++) { 3919 nnz = Ii[i+1]- Ii[i]; 3920 JJ = J + Ii[i]; 3921 nnz_max = PetscMax(nnz_max,nnz); 3922 d = 0; 3923 for (j=0; j<nnz; j++) { 3924 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3925 } 3926 d_nnz[i] = d; 3927 o_nnz[i] = nnz - d; 3928 } 3929 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3930 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3931 3932 if (v) values = (PetscScalar*)v; 3933 else { 3934 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3935 } 3936 3937 for (i=0; i<m; i++) { 3938 ii = i + rstart; 3939 nnz = Ii[i+1]- Ii[i]; 3940 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3941 } 3942 nooffprocentries = B->nooffprocentries; 3943 B->nooffprocentries = PETSC_TRUE; 3944 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3945 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3946 B->nooffprocentries = nooffprocentries; 3947 3948 if (!v) { 3949 ierr = PetscFree(values);CHKERRQ(ierr); 3950 } 3951 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3952 PetscFunctionReturn(0); 3953 } 3954 3955 /*@ 3956 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3957 (the default parallel PETSc format). 3958 3959 Collective on MPI_Comm 3960 3961 Input Parameters: 3962 + B - the matrix 3963 . i - the indices into j for the start of each local row (starts with zero) 3964 . j - the column indices for each local row (starts with zero) 3965 - v - optional values in the matrix 3966 3967 Level: developer 3968 3969 Notes: 3970 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3971 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3972 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3973 3974 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3975 3976 The format which is used for the sparse matrix input, is equivalent to a 3977 row-major ordering.. i.e for the following matrix, the input data expected is 3978 as shown 3979 3980 $ 1 0 0 3981 $ 2 0 3 P0 3982 $ ------- 3983 $ 4 5 6 P1 3984 $ 3985 $ Process0 [P0]: rows_owned=[0,1] 3986 $ i = {0,1,3} [size = nrow+1 = 2+1] 3987 $ j = {0,0,2} [size = 3] 3988 $ v = {1,2,3} [size = 3] 3989 $ 3990 $ Process1 [P1]: rows_owned=[2] 3991 $ i = {0,3} [size = nrow+1 = 1+1] 3992 $ j = {0,1,2} [size = 3] 3993 $ v = {4,5,6} [size = 3] 3994 3995 .keywords: matrix, aij, compressed row, sparse, parallel 3996 3997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3998 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3999 @*/ 4000 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4001 { 4002 PetscErrorCode ierr; 4003 4004 PetscFunctionBegin; 4005 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 /*@C 4010 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4011 (the default parallel PETSc format). For good matrix assembly performance 4012 the user should preallocate the matrix storage by setting the parameters 4013 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4014 performance can be increased by more than a factor of 50. 4015 4016 Collective on MPI_Comm 4017 4018 Input Parameters: 4019 + B - the matrix 4020 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4021 (same value is used for all local rows) 4022 . d_nnz - array containing the number of nonzeros in the various rows of the 4023 DIAGONAL portion of the local submatrix (possibly different for each row) 4024 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4025 The size of this array is equal to the number of local rows, i.e 'm'. 4026 For matrices that will be factored, you must leave room for (and set) 4027 the diagonal entry even if it is zero. 4028 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4029 submatrix (same value is used for all local rows). 4030 - o_nnz - array containing the number of nonzeros in the various rows of the 4031 OFF-DIAGONAL portion of the local submatrix (possibly different for 4032 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4033 structure. The size of this array is equal to the number 4034 of local rows, i.e 'm'. 4035 4036 If the *_nnz parameter is given then the *_nz parameter is ignored 4037 4038 The AIJ format (also called the Yale sparse matrix format or 4039 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See Users-Manual: ch_mat for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another SeqAIJ matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .keywords: matrix, aij, compressed row, sparse, parallel 4136 4137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4138 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4139 @*/ 4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4141 { 4142 PetscErrorCode ierr; 4143 4144 PetscFunctionBegin; 4145 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4146 PetscValidType(B,1); 4147 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4148 PetscFunctionReturn(0); 4149 } 4150 4151 /*@ 4152 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4153 CSR format the local rows. 4154 4155 Collective on MPI_Comm 4156 4157 Input Parameters: 4158 + comm - MPI communicator 4159 . m - number of local rows (Cannot be PETSC_DECIDE) 4160 . n - This value should be the same as the local size used in creating the 4161 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4162 calculated if N is given) For square matrices n is almost always m. 4163 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4164 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4165 . i - row indices 4166 . j - column indices 4167 - a - matrix values 4168 4169 Output Parameter: 4170 . mat - the matrix 4171 4172 Level: intermediate 4173 4174 Notes: 4175 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4176 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4177 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4178 4179 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4180 4181 The format which is used for the sparse matrix input, is equivalent to a 4182 row-major ordering.. i.e for the following matrix, the input data expected is 4183 as shown 4184 4185 $ 1 0 0 4186 $ 2 0 3 P0 4187 $ ------- 4188 $ 4 5 6 P1 4189 $ 4190 $ Process0 [P0]: rows_owned=[0,1] 4191 $ i = {0,1,3} [size = nrow+1 = 2+1] 4192 $ j = {0,0,2} [size = 3] 4193 $ v = {1,2,3} [size = 3] 4194 $ 4195 $ Process1 [P1]: rows_owned=[2] 4196 $ i = {0,3} [size = nrow+1 = 1+1] 4197 $ j = {0,1,2} [size = 3] 4198 $ v = {4,5,6} [size = 3] 4199 4200 .keywords: matrix, aij, compressed row, sparse, parallel 4201 4202 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4203 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4204 @*/ 4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4206 { 4207 PetscErrorCode ierr; 4208 4209 PetscFunctionBegin; 4210 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4211 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4212 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4213 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4214 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4215 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4216 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4217 PetscFunctionReturn(0); 4218 } 4219 4220 /*@C 4221 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4222 (the default parallel PETSc format). For good matrix assembly performance 4223 the user should preallocate the matrix storage by setting the parameters 4224 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4225 performance can be increased by more than a factor of 50. 4226 4227 Collective on MPI_Comm 4228 4229 Input Parameters: 4230 + comm - MPI communicator 4231 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4232 This value should be the same as the local size used in creating the 4233 y vector for the matrix-vector product y = Ax. 4234 . n - This value should be the same as the local size used in creating the 4235 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4236 calculated if N is given) For square matrices n is almost always m. 4237 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4238 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4239 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4240 (same value is used for all local rows) 4241 . d_nnz - array containing the number of nonzeros in the various rows of the 4242 DIAGONAL portion of the local submatrix (possibly different for each row) 4243 or NULL, if d_nz is used to specify the nonzero structure. 4244 The size of this array is equal to the number of local rows, i.e 'm'. 4245 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4246 submatrix (same value is used for all local rows). 4247 - o_nnz - array containing the number of nonzeros in the various rows of the 4248 OFF-DIAGONAL portion of the local submatrix (possibly different for 4249 each row) or NULL, if o_nz is used to specify the nonzero 4250 structure. The size of this array is equal to the number 4251 of local rows, i.e 'm'. 4252 4253 Output Parameter: 4254 . A - the matrix 4255 4256 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4257 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4258 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4259 4260 Notes: 4261 If the *_nnz parameter is given then the *_nz parameter is ignored 4262 4263 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4264 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4265 storage requirements for this matrix. 4266 4267 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4268 processor than it must be used on all processors that share the object for 4269 that argument. 4270 4271 The user MUST specify either the local or global matrix dimensions 4272 (possibly both). 4273 4274 The parallel matrix is partitioned across processors such that the 4275 first m0 rows belong to process 0, the next m1 rows belong to 4276 process 1, the next m2 rows belong to process 2 etc.. where 4277 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4278 values corresponding to [m x N] submatrix. 4279 4280 The columns are logically partitioned with the n0 columns belonging 4281 to 0th partition, the next n1 columns belonging to the next 4282 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4283 4284 The DIAGONAL portion of the local submatrix on any given processor 4285 is the submatrix corresponding to the rows and columns m,n 4286 corresponding to the given processor. i.e diagonal matrix on 4287 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4288 etc. The remaining portion of the local submatrix [m x (N-n)] 4289 constitute the OFF-DIAGONAL portion. The example below better 4290 illustrates this concept. 4291 4292 For a square global matrix we define each processor's diagonal portion 4293 to be its local rows and the corresponding columns (a square submatrix); 4294 each processor's off-diagonal portion encompasses the remainder of the 4295 local matrix (a rectangular submatrix). 4296 4297 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4298 4299 When calling this routine with a single process communicator, a matrix of 4300 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4301 type of communicator, use the construction mechanism 4302 .vb 4303 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4304 .ve 4305 4306 $ MatCreate(...,&A); 4307 $ MatSetType(A,MATMPIAIJ); 4308 $ MatSetSizes(A, m,n,M,N); 4309 $ MatMPIAIJSetPreallocation(A,...); 4310 4311 By default, this format uses inodes (identical nodes) when possible. 4312 We search for consecutive rows with the same nonzero structure, thereby 4313 reusing matrix information to achieve increased efficiency. 4314 4315 Options Database Keys: 4316 + -mat_no_inode - Do not use inodes 4317 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4318 4319 4320 4321 Example usage: 4322 4323 Consider the following 8x8 matrix with 34 non-zero values, that is 4324 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4325 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4326 as follows 4327 4328 .vb 4329 1 2 0 | 0 3 0 | 0 4 4330 Proc0 0 5 6 | 7 0 0 | 8 0 4331 9 0 10 | 11 0 0 | 12 0 4332 ------------------------------------- 4333 13 0 14 | 15 16 17 | 0 0 4334 Proc1 0 18 0 | 19 20 21 | 0 0 4335 0 0 0 | 22 23 0 | 24 0 4336 ------------------------------------- 4337 Proc2 25 26 27 | 0 0 28 | 29 0 4338 30 0 0 | 31 32 33 | 0 34 4339 .ve 4340 4341 This can be represented as a collection of submatrices as 4342 4343 .vb 4344 A B C 4345 D E F 4346 G H I 4347 .ve 4348 4349 Where the submatrices A,B,C are owned by proc0, D,E,F are 4350 owned by proc1, G,H,I are owned by proc2. 4351 4352 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4353 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4354 The 'M','N' parameters are 8,8, and have the same values on all procs. 4355 4356 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4357 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4358 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4359 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4360 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4361 matrix, ans [DF] as another SeqAIJ matrix. 4362 4363 When d_nz, o_nz parameters are specified, d_nz storage elements are 4364 allocated for every row of the local diagonal submatrix, and o_nz 4365 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4366 One way to choose d_nz and o_nz is to use the max nonzerors per local 4367 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4368 In this case, the values of d_nz,o_nz are 4369 .vb 4370 proc0 : dnz = 2, o_nz = 2 4371 proc1 : dnz = 3, o_nz = 2 4372 proc2 : dnz = 1, o_nz = 4 4373 .ve 4374 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4375 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4376 for proc3. i.e we are using 12+15+10=37 storage locations to store 4377 34 values. 4378 4379 When d_nnz, o_nnz parameters are specified, the storage is specified 4380 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4381 In the above case the values for d_nnz,o_nnz are 4382 .vb 4383 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4384 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4385 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4386 .ve 4387 Here the space allocated is sum of all the above values i.e 34, and 4388 hence pre-allocation is perfect. 4389 4390 Level: intermediate 4391 4392 .keywords: matrix, aij, compressed row, sparse, parallel 4393 4394 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4395 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4396 @*/ 4397 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4398 { 4399 PetscErrorCode ierr; 4400 PetscMPIInt size; 4401 4402 PetscFunctionBegin; 4403 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4404 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4405 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4406 if (size > 1) { 4407 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4408 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4409 } else { 4410 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4411 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4412 } 4413 PetscFunctionReturn(0); 4414 } 4415 4416 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4417 { 4418 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4419 PetscBool flg; 4420 PetscErrorCode ierr; 4421 4422 PetscFunctionBegin; 4423 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4424 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4425 if (Ad) *Ad = a->A; 4426 if (Ao) *Ao = a->B; 4427 if (colmap) *colmap = a->garray; 4428 PetscFunctionReturn(0); 4429 } 4430 4431 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4432 { 4433 PetscErrorCode ierr; 4434 PetscInt m,N,i,rstart,nnz,Ii; 4435 PetscInt *indx; 4436 PetscScalar *values; 4437 4438 PetscFunctionBegin; 4439 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4440 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4441 PetscInt *dnz,*onz,sum,bs,cbs; 4442 4443 if (n == PETSC_DECIDE) { 4444 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4445 } 4446 /* Check sum(n) = N */ 4447 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4448 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4449 4450 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4451 rstart -= m; 4452 4453 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4454 for (i=0; i<m; i++) { 4455 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4456 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4457 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4458 } 4459 4460 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4461 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4462 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4463 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4464 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4465 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4466 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4467 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4468 } 4469 4470 /* numeric phase */ 4471 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4472 for (i=0; i<m; i++) { 4473 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4474 Ii = i + rstart; 4475 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4476 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4477 } 4478 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4479 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4480 PetscFunctionReturn(0); 4481 } 4482 4483 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4484 { 4485 PetscErrorCode ierr; 4486 PetscMPIInt rank; 4487 PetscInt m,N,i,rstart,nnz; 4488 size_t len; 4489 const PetscInt *indx; 4490 PetscViewer out; 4491 char *name; 4492 Mat B; 4493 const PetscScalar *values; 4494 4495 PetscFunctionBegin; 4496 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4497 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4498 /* Should this be the type of the diagonal block of A? */ 4499 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4500 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4501 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4502 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4503 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4504 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4505 for (i=0; i<m; i++) { 4506 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4507 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4508 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4509 } 4510 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4511 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4512 4513 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4514 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4515 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4516 sprintf(name,"%s.%d",outfile,rank); 4517 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4518 ierr = PetscFree(name);CHKERRQ(ierr); 4519 ierr = MatView(B,out);CHKERRQ(ierr); 4520 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4521 ierr = MatDestroy(&B);CHKERRQ(ierr); 4522 PetscFunctionReturn(0); 4523 } 4524 4525 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4526 { 4527 PetscErrorCode ierr; 4528 Mat_Merge_SeqsToMPI *merge; 4529 PetscContainer container; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4533 if (container) { 4534 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4535 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4536 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4537 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4538 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4539 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4540 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4541 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4542 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4543 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4544 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4545 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4546 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4547 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4548 ierr = PetscFree(merge);CHKERRQ(ierr); 4549 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4550 } 4551 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4552 PetscFunctionReturn(0); 4553 } 4554 4555 #include <../src/mat/utils/freespace.h> 4556 #include <petscbt.h> 4557 4558 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4559 { 4560 PetscErrorCode ierr; 4561 MPI_Comm comm; 4562 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4563 PetscMPIInt size,rank,taga,*len_s; 4564 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4565 PetscInt proc,m; 4566 PetscInt **buf_ri,**buf_rj; 4567 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4568 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4569 MPI_Request *s_waits,*r_waits; 4570 MPI_Status *status; 4571 MatScalar *aa=a->a; 4572 MatScalar **abuf_r,*ba_i; 4573 Mat_Merge_SeqsToMPI *merge; 4574 PetscContainer container; 4575 4576 PetscFunctionBegin; 4577 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4578 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4579 4580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4581 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4582 4583 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4584 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4585 4586 bi = merge->bi; 4587 bj = merge->bj; 4588 buf_ri = merge->buf_ri; 4589 buf_rj = merge->buf_rj; 4590 4591 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4592 owners = merge->rowmap->range; 4593 len_s = merge->len_s; 4594 4595 /* send and recv matrix values */ 4596 /*-----------------------------*/ 4597 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4598 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4599 4600 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4601 for (proc=0,k=0; proc<size; proc++) { 4602 if (!len_s[proc]) continue; 4603 i = owners[proc]; 4604 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4605 k++; 4606 } 4607 4608 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4609 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4610 ierr = PetscFree(status);CHKERRQ(ierr); 4611 4612 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4613 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4614 4615 /* insert mat values of mpimat */ 4616 /*----------------------------*/ 4617 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4618 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4619 4620 for (k=0; k<merge->nrecv; k++) { 4621 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4622 nrows = *(buf_ri_k[k]); 4623 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4624 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4625 } 4626 4627 /* set values of ba */ 4628 m = merge->rowmap->n; 4629 for (i=0; i<m; i++) { 4630 arow = owners[rank] + i; 4631 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4632 bnzi = bi[i+1] - bi[i]; 4633 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4634 4635 /* add local non-zero vals of this proc's seqmat into ba */ 4636 anzi = ai[arow+1] - ai[arow]; 4637 aj = a->j + ai[arow]; 4638 aa = a->a + ai[arow]; 4639 nextaj = 0; 4640 for (j=0; nextaj<anzi; j++) { 4641 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4642 ba_i[j] += aa[nextaj++]; 4643 } 4644 } 4645 4646 /* add received vals into ba */ 4647 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4648 /* i-th row */ 4649 if (i == *nextrow[k]) { 4650 anzi = *(nextai[k]+1) - *nextai[k]; 4651 aj = buf_rj[k] + *(nextai[k]); 4652 aa = abuf_r[k] + *(nextai[k]); 4653 nextaj = 0; 4654 for (j=0; nextaj<anzi; j++) { 4655 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4656 ba_i[j] += aa[nextaj++]; 4657 } 4658 } 4659 nextrow[k]++; nextai[k]++; 4660 } 4661 } 4662 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4663 } 4664 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4665 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4666 4667 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4668 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4669 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4670 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4671 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4672 PetscFunctionReturn(0); 4673 } 4674 4675 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4676 { 4677 PetscErrorCode ierr; 4678 Mat B_mpi; 4679 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4680 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4681 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4682 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4683 PetscInt len,proc,*dnz,*onz,bs,cbs; 4684 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4685 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4686 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4687 MPI_Status *status; 4688 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4689 PetscBT lnkbt; 4690 Mat_Merge_SeqsToMPI *merge; 4691 PetscContainer container; 4692 4693 PetscFunctionBegin; 4694 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4695 4696 /* make sure it is a PETSc comm */ 4697 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4698 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4699 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4700 4701 ierr = PetscNew(&merge);CHKERRQ(ierr); 4702 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4703 4704 /* determine row ownership */ 4705 /*---------------------------------------------------------*/ 4706 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4707 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4708 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4709 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4710 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4711 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4712 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4713 4714 m = merge->rowmap->n; 4715 owners = merge->rowmap->range; 4716 4717 /* determine the number of messages to send, their lengths */ 4718 /*---------------------------------------------------------*/ 4719 len_s = merge->len_s; 4720 4721 len = 0; /* length of buf_si[] */ 4722 merge->nsend = 0; 4723 for (proc=0; proc<size; proc++) { 4724 len_si[proc] = 0; 4725 if (proc == rank) { 4726 len_s[proc] = 0; 4727 } else { 4728 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4729 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4730 } 4731 if (len_s[proc]) { 4732 merge->nsend++; 4733 nrows = 0; 4734 for (i=owners[proc]; i<owners[proc+1]; i++) { 4735 if (ai[i+1] > ai[i]) nrows++; 4736 } 4737 len_si[proc] = 2*(nrows+1); 4738 len += len_si[proc]; 4739 } 4740 } 4741 4742 /* determine the number and length of messages to receive for ij-structure */ 4743 /*-------------------------------------------------------------------------*/ 4744 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4745 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4746 4747 /* post the Irecv of j-structure */ 4748 /*-------------------------------*/ 4749 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4750 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4751 4752 /* post the Isend of j-structure */ 4753 /*--------------------------------*/ 4754 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4755 4756 for (proc=0, k=0; proc<size; proc++) { 4757 if (!len_s[proc]) continue; 4758 i = owners[proc]; 4759 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4760 k++; 4761 } 4762 4763 /* receives and sends of j-structure are complete */ 4764 /*------------------------------------------------*/ 4765 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4766 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4767 4768 /* send and recv i-structure */ 4769 /*---------------------------*/ 4770 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4771 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4772 4773 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4774 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4775 for (proc=0,k=0; proc<size; proc++) { 4776 if (!len_s[proc]) continue; 4777 /* form outgoing message for i-structure: 4778 buf_si[0]: nrows to be sent 4779 [1:nrows]: row index (global) 4780 [nrows+1:2*nrows+1]: i-structure index 4781 */ 4782 /*-------------------------------------------*/ 4783 nrows = len_si[proc]/2 - 1; 4784 buf_si_i = buf_si + nrows+1; 4785 buf_si[0] = nrows; 4786 buf_si_i[0] = 0; 4787 nrows = 0; 4788 for (i=owners[proc]; i<owners[proc+1]; i++) { 4789 anzi = ai[i+1] - ai[i]; 4790 if (anzi) { 4791 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4792 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4793 nrows++; 4794 } 4795 } 4796 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4797 k++; 4798 buf_si += len_si[proc]; 4799 } 4800 4801 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4802 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4803 4804 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4805 for (i=0; i<merge->nrecv; i++) { 4806 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4807 } 4808 4809 ierr = PetscFree(len_si);CHKERRQ(ierr); 4810 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4811 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4812 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4813 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4814 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4815 ierr = PetscFree(status);CHKERRQ(ierr); 4816 4817 /* compute a local seq matrix in each processor */ 4818 /*----------------------------------------------*/ 4819 /* allocate bi array and free space for accumulating nonzero column info */ 4820 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4821 bi[0] = 0; 4822 4823 /* create and initialize a linked list */ 4824 nlnk = N+1; 4825 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4826 4827 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4828 len = ai[owners[rank+1]] - ai[owners[rank]]; 4829 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4830 4831 current_space = free_space; 4832 4833 /* determine symbolic info for each local row */ 4834 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4835 4836 for (k=0; k<merge->nrecv; k++) { 4837 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4838 nrows = *buf_ri_k[k]; 4839 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4840 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4841 } 4842 4843 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4844 len = 0; 4845 for (i=0; i<m; i++) { 4846 bnzi = 0; 4847 /* add local non-zero cols of this proc's seqmat into lnk */ 4848 arow = owners[rank] + i; 4849 anzi = ai[arow+1] - ai[arow]; 4850 aj = a->j + ai[arow]; 4851 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4852 bnzi += nlnk; 4853 /* add received col data into lnk */ 4854 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4855 if (i == *nextrow[k]) { /* i-th row */ 4856 anzi = *(nextai[k]+1) - *nextai[k]; 4857 aj = buf_rj[k] + *nextai[k]; 4858 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4859 bnzi += nlnk; 4860 nextrow[k]++; nextai[k]++; 4861 } 4862 } 4863 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4864 4865 /* if free space is not available, make more free space */ 4866 if (current_space->local_remaining<bnzi) { 4867 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4868 nspacedouble++; 4869 } 4870 /* copy data into free space, then initialize lnk */ 4871 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4872 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4873 4874 current_space->array += bnzi; 4875 current_space->local_used += bnzi; 4876 current_space->local_remaining -= bnzi; 4877 4878 bi[i+1] = bi[i] + bnzi; 4879 } 4880 4881 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4882 4883 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4884 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4885 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4886 4887 /* create symbolic parallel matrix B_mpi */ 4888 /*---------------------------------------*/ 4889 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4890 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4891 if (n==PETSC_DECIDE) { 4892 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4893 } else { 4894 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4895 } 4896 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4897 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4898 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4899 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4900 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4901 4902 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4903 B_mpi->assembled = PETSC_FALSE; 4904 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4905 merge->bi = bi; 4906 merge->bj = bj; 4907 merge->buf_ri = buf_ri; 4908 merge->buf_rj = buf_rj; 4909 merge->coi = NULL; 4910 merge->coj = NULL; 4911 merge->owners_co = NULL; 4912 4913 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4914 4915 /* attach the supporting struct to B_mpi for reuse */ 4916 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4917 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4918 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4919 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4920 *mpimat = B_mpi; 4921 4922 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4923 PetscFunctionReturn(0); 4924 } 4925 4926 /*@C 4927 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4928 matrices from each processor 4929 4930 Collective on MPI_Comm 4931 4932 Input Parameters: 4933 + comm - the communicators the parallel matrix will live on 4934 . seqmat - the input sequential matrices 4935 . m - number of local rows (or PETSC_DECIDE) 4936 . n - number of local columns (or PETSC_DECIDE) 4937 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4938 4939 Output Parameter: 4940 . mpimat - the parallel matrix generated 4941 4942 Level: advanced 4943 4944 Notes: 4945 The dimensions of the sequential matrix in each processor MUST be the same. 4946 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4947 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4948 @*/ 4949 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4950 { 4951 PetscErrorCode ierr; 4952 PetscMPIInt size; 4953 4954 PetscFunctionBegin; 4955 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4956 if (size == 1) { 4957 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4958 if (scall == MAT_INITIAL_MATRIX) { 4959 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4960 } else { 4961 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4962 } 4963 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4967 if (scall == MAT_INITIAL_MATRIX) { 4968 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4969 } 4970 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4971 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4972 PetscFunctionReturn(0); 4973 } 4974 4975 /*@ 4976 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4977 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4978 with MatGetSize() 4979 4980 Not Collective 4981 4982 Input Parameters: 4983 + A - the matrix 4984 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4985 4986 Output Parameter: 4987 . A_loc - the local sequential matrix generated 4988 4989 Level: developer 4990 4991 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4992 4993 @*/ 4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4995 { 4996 PetscErrorCode ierr; 4997 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4998 Mat_SeqAIJ *mat,*a,*b; 4999 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5000 MatScalar *aa,*ba,*cam; 5001 PetscScalar *ca; 5002 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5003 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5004 PetscBool match; 5005 MPI_Comm comm; 5006 PetscMPIInt size; 5007 5008 PetscFunctionBegin; 5009 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5010 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5011 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5012 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5013 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5014 5015 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5016 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5017 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5018 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5019 aa = a->a; ba = b->a; 5020 if (scall == MAT_INITIAL_MATRIX) { 5021 if (size == 1) { 5022 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5027 ci[0] = 0; 5028 for (i=0; i<am; i++) { 5029 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5030 } 5031 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5032 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5033 k = 0; 5034 for (i=0; i<am; i++) { 5035 ncols_o = bi[i+1] - bi[i]; 5036 ncols_d = ai[i+1] - ai[i]; 5037 /* off-diagonal portion of A */ 5038 for (jo=0; jo<ncols_o; jo++) { 5039 col = cmap[*bj]; 5040 if (col >= cstart) break; 5041 cj[k] = col; bj++; 5042 ca[k++] = *ba++; 5043 } 5044 /* diagonal portion of A */ 5045 for (j=0; j<ncols_d; j++) { 5046 cj[k] = cstart + *aj++; 5047 ca[k++] = *aa++; 5048 } 5049 /* off-diagonal portion of A */ 5050 for (j=jo; j<ncols_o; j++) { 5051 cj[k] = cmap[*bj++]; 5052 ca[k++] = *ba++; 5053 } 5054 } 5055 /* put together the new matrix */ 5056 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5057 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5058 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5059 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5060 mat->free_a = PETSC_TRUE; 5061 mat->free_ij = PETSC_TRUE; 5062 mat->nonew = 0; 5063 } else if (scall == MAT_REUSE_MATRIX) { 5064 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5065 ci = mat->i; cj = mat->j; cam = mat->a; 5066 for (i=0; i<am; i++) { 5067 /* off-diagonal portion of A */ 5068 ncols_o = bi[i+1] - bi[i]; 5069 for (jo=0; jo<ncols_o; jo++) { 5070 col = cmap[*bj]; 5071 if (col >= cstart) break; 5072 *cam++ = *ba++; bj++; 5073 } 5074 /* diagonal portion of A */ 5075 ncols_d = ai[i+1] - ai[i]; 5076 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5077 /* off-diagonal portion of A */ 5078 for (j=jo; j<ncols_o; j++) { 5079 *cam++ = *ba++; bj++; 5080 } 5081 } 5082 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5083 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5084 PetscFunctionReturn(0); 5085 } 5086 5087 /*@C 5088 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5089 5090 Not Collective 5091 5092 Input Parameters: 5093 + A - the matrix 5094 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5095 - row, col - index sets of rows and columns to extract (or NULL) 5096 5097 Output Parameter: 5098 . A_loc - the local sequential matrix generated 5099 5100 Level: developer 5101 5102 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5103 5104 @*/ 5105 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5106 { 5107 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5108 PetscErrorCode ierr; 5109 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5110 IS isrowa,iscola; 5111 Mat *aloc; 5112 PetscBool match; 5113 5114 PetscFunctionBegin; 5115 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5116 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5117 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5118 if (!row) { 5119 start = A->rmap->rstart; end = A->rmap->rend; 5120 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5121 } else { 5122 isrowa = *row; 5123 } 5124 if (!col) { 5125 start = A->cmap->rstart; 5126 cmap = a->garray; 5127 nzA = a->A->cmap->n; 5128 nzB = a->B->cmap->n; 5129 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5130 ncols = 0; 5131 for (i=0; i<nzB; i++) { 5132 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5133 else break; 5134 } 5135 imark = i; 5136 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5137 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5138 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5139 } else { 5140 iscola = *col; 5141 } 5142 if (scall != MAT_INITIAL_MATRIX) { 5143 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5144 aloc[0] = *A_loc; 5145 } 5146 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5147 if (!col) { /* attach global id of condensed columns */ 5148 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5149 } 5150 *A_loc = aloc[0]; 5151 ierr = PetscFree(aloc);CHKERRQ(ierr); 5152 if (!row) { 5153 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5154 } 5155 if (!col) { 5156 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5157 } 5158 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5159 PetscFunctionReturn(0); 5160 } 5161 5162 /*@C 5163 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5164 5165 Collective on Mat 5166 5167 Input Parameters: 5168 + A,B - the matrices in mpiaij format 5169 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5170 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5171 5172 Output Parameter: 5173 + rowb, colb - index sets of rows and columns of B to extract 5174 - B_seq - the sequential matrix generated 5175 5176 Level: developer 5177 5178 @*/ 5179 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5180 { 5181 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5182 PetscErrorCode ierr; 5183 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5184 IS isrowb,iscolb; 5185 Mat *bseq=NULL; 5186 5187 PetscFunctionBegin; 5188 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5189 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5190 } 5191 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5192 5193 if (scall == MAT_INITIAL_MATRIX) { 5194 start = A->cmap->rstart; 5195 cmap = a->garray; 5196 nzA = a->A->cmap->n; 5197 nzB = a->B->cmap->n; 5198 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5199 ncols = 0; 5200 for (i=0; i<nzB; i++) { /* row < local row index */ 5201 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5202 else break; 5203 } 5204 imark = i; 5205 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5206 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5207 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5208 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5209 } else { 5210 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5211 isrowb = *rowb; iscolb = *colb; 5212 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5213 bseq[0] = *B_seq; 5214 } 5215 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5216 *B_seq = bseq[0]; 5217 ierr = PetscFree(bseq);CHKERRQ(ierr); 5218 if (!rowb) { 5219 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5220 } else { 5221 *rowb = isrowb; 5222 } 5223 if (!colb) { 5224 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5225 } else { 5226 *colb = iscolb; 5227 } 5228 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5229 PetscFunctionReturn(0); 5230 } 5231 5232 #include <petsc/private/vecscatterimpl.h> 5233 /* 5234 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5235 of the OFF-DIAGONAL portion of local A 5236 5237 Collective on Mat 5238 5239 Input Parameters: 5240 + A,B - the matrices in mpiaij format 5241 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5242 5243 Output Parameter: 5244 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5245 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5246 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5247 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5248 5249 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5250 for this matrix. This is not desirable.. 5251 5252 Level: developer 5253 5254 */ 5255 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5256 { 5257 VecScatter_MPI_General *gen_to,*gen_from; 5258 PetscErrorCode ierr; 5259 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5260 Mat_SeqAIJ *b_oth; 5261 VecScatter ctx; 5262 MPI_Comm comm; 5263 PetscMPIInt *rprocs,*sprocs,tag,rank; 5264 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5265 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5266 PetscScalar *b_otha,*bufa,*bufA,*vals; 5267 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5268 MPI_Request *rwaits = NULL,*swaits = NULL; 5269 MPI_Status *sstatus,rstatus; 5270 PetscMPIInt jj,size; 5271 VecScatterType type; 5272 PetscBool mpi1; 5273 5274 PetscFunctionBegin; 5275 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5276 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5277 5278 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5279 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5280 } 5281 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5282 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5283 5284 if (size == 1) { 5285 startsj_s = NULL; 5286 bufa_ptr = NULL; 5287 *B_oth = NULL; 5288 PetscFunctionReturn(0); 5289 } 5290 5291 ctx = a->Mvctx; 5292 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5293 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5294 if (!mpi1) { 5295 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5296 thus create a->Mvctx_mpi1 */ 5297 if (!a->Mvctx_mpi1) { 5298 a->Mvctx_mpi1_flg = PETSC_TRUE; 5299 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5300 } 5301 ctx = a->Mvctx_mpi1; 5302 } 5303 tag = ((PetscObject)ctx)->tag; 5304 5305 gen_to = (VecScatter_MPI_General*)ctx->todata; 5306 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5307 nrecvs = gen_from->n; 5308 nsends = gen_to->n; 5309 5310 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5311 srow = gen_to->indices; /* local row index to be sent */ 5312 sstarts = gen_to->starts; 5313 sprocs = gen_to->procs; 5314 sstatus = gen_to->sstatus; 5315 sbs = gen_to->bs; 5316 rstarts = gen_from->starts; 5317 rprocs = gen_from->procs; 5318 rbs = gen_from->bs; 5319 5320 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5321 if (scall == MAT_INITIAL_MATRIX) { 5322 /* i-array */ 5323 /*---------*/ 5324 /* post receives */ 5325 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5326 for (i=0; i<nrecvs; i++) { 5327 rowlen = rvalues + rstarts[i]*rbs; 5328 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5329 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5330 } 5331 5332 /* pack the outgoing message */ 5333 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5334 5335 sstartsj[0] = 0; 5336 rstartsj[0] = 0; 5337 len = 0; /* total length of j or a array to be sent */ 5338 k = 0; 5339 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5340 for (i=0; i<nsends; i++) { 5341 rowlen = svalues + sstarts[i]*sbs; 5342 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5343 for (j=0; j<nrows; j++) { 5344 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5345 for (l=0; l<sbs; l++) { 5346 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5347 5348 rowlen[j*sbs+l] = ncols; 5349 5350 len += ncols; 5351 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5352 } 5353 k++; 5354 } 5355 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5356 5357 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5358 } 5359 /* recvs and sends of i-array are completed */ 5360 i = nrecvs; 5361 while (i--) { 5362 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5363 } 5364 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5365 ierr = PetscFree(svalues);CHKERRQ(ierr); 5366 5367 /* allocate buffers for sending j and a arrays */ 5368 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5370 5371 /* create i-array of B_oth */ 5372 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5373 5374 b_othi[0] = 0; 5375 len = 0; /* total length of j or a array to be received */ 5376 k = 0; 5377 for (i=0; i<nrecvs; i++) { 5378 rowlen = rvalues + rstarts[i]*rbs; 5379 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5380 for (j=0; j<nrows; j++) { 5381 b_othi[k+1] = b_othi[k] + rowlen[j]; 5382 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5383 k++; 5384 } 5385 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5386 } 5387 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5388 5389 /* allocate space for j and a arrrays of B_oth */ 5390 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5391 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5392 5393 /* j-array */ 5394 /*---------*/ 5395 /* post receives of j-array */ 5396 for (i=0; i<nrecvs; i++) { 5397 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5398 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5399 } 5400 5401 /* pack the outgoing message j-array */ 5402 k = 0; 5403 for (i=0; i<nsends; i++) { 5404 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5405 bufJ = bufj+sstartsj[i]; 5406 for (j=0; j<nrows; j++) { 5407 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5408 for (ll=0; ll<sbs; ll++) { 5409 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5410 for (l=0; l<ncols; l++) { 5411 *bufJ++ = cols[l]; 5412 } 5413 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5414 } 5415 } 5416 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5417 } 5418 5419 /* recvs and sends of j-array are completed */ 5420 i = nrecvs; 5421 while (i--) { 5422 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5423 } 5424 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5425 } else if (scall == MAT_REUSE_MATRIX) { 5426 sstartsj = *startsj_s; 5427 rstartsj = *startsj_r; 5428 bufa = *bufa_ptr; 5429 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5430 b_otha = b_oth->a; 5431 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5432 5433 /* a-array */ 5434 /*---------*/ 5435 /* post receives of a-array */ 5436 for (i=0; i<nrecvs; i++) { 5437 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5438 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5439 } 5440 5441 /* pack the outgoing message a-array */ 5442 k = 0; 5443 for (i=0; i<nsends; i++) { 5444 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5445 bufA = bufa+sstartsj[i]; 5446 for (j=0; j<nrows; j++) { 5447 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5448 for (ll=0; ll<sbs; ll++) { 5449 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5450 for (l=0; l<ncols; l++) { 5451 *bufA++ = vals[l]; 5452 } 5453 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5454 } 5455 } 5456 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5457 } 5458 /* recvs and sends of a-array are completed */ 5459 i = nrecvs; 5460 while (i--) { 5461 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5462 } 5463 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5464 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5465 5466 if (scall == MAT_INITIAL_MATRIX) { 5467 /* put together the new matrix */ 5468 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5469 5470 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5471 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5472 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5473 b_oth->free_a = PETSC_TRUE; 5474 b_oth->free_ij = PETSC_TRUE; 5475 b_oth->nonew = 0; 5476 5477 ierr = PetscFree(bufj);CHKERRQ(ierr); 5478 if (!startsj_s || !bufa_ptr) { 5479 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5480 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5481 } else { 5482 *startsj_s = sstartsj; 5483 *startsj_r = rstartsj; 5484 *bufa_ptr = bufa; 5485 } 5486 } 5487 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5488 PetscFunctionReturn(0); 5489 } 5490 5491 /*@C 5492 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5493 5494 Not Collective 5495 5496 Input Parameters: 5497 . A - The matrix in mpiaij format 5498 5499 Output Parameter: 5500 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5501 . colmap - A map from global column index to local index into lvec 5502 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5503 5504 Level: developer 5505 5506 @*/ 5507 #if defined(PETSC_USE_CTABLE) 5508 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5509 #else 5510 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5511 #endif 5512 { 5513 Mat_MPIAIJ *a; 5514 5515 PetscFunctionBegin; 5516 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5517 PetscValidPointer(lvec, 2); 5518 PetscValidPointer(colmap, 3); 5519 PetscValidPointer(multScatter, 4); 5520 a = (Mat_MPIAIJ*) A->data; 5521 if (lvec) *lvec = a->lvec; 5522 if (colmap) *colmap = a->colmap; 5523 if (multScatter) *multScatter = a->Mvctx; 5524 PetscFunctionReturn(0); 5525 } 5526 5527 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5528 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5529 #if defined(PETSC_HAVE_MKL_SPARSE) 5530 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5531 #endif 5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5533 #if defined(PETSC_HAVE_ELEMENTAL) 5534 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5535 #endif 5536 #if defined(PETSC_HAVE_HYPRE) 5537 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5538 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5539 #endif 5540 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5541 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5542 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5543 5544 /* 5545 Computes (B'*A')' since computing B*A directly is untenable 5546 5547 n p p 5548 ( ) ( ) ( ) 5549 m ( A ) * n ( B ) = m ( C ) 5550 ( ) ( ) ( ) 5551 5552 */ 5553 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5554 { 5555 PetscErrorCode ierr; 5556 Mat At,Bt,Ct; 5557 5558 PetscFunctionBegin; 5559 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5560 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5561 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5562 ierr = MatDestroy(&At);CHKERRQ(ierr); 5563 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5564 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5565 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5566 PetscFunctionReturn(0); 5567 } 5568 5569 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5570 { 5571 PetscErrorCode ierr; 5572 PetscInt m=A->rmap->n,n=B->cmap->n; 5573 Mat Cmat; 5574 5575 PetscFunctionBegin; 5576 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5577 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5578 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5579 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5580 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5581 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5582 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5583 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5584 5585 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5586 5587 *C = Cmat; 5588 PetscFunctionReturn(0); 5589 } 5590 5591 /* ----------------------------------------------------------------*/ 5592 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5593 { 5594 PetscErrorCode ierr; 5595 5596 PetscFunctionBegin; 5597 if (scall == MAT_INITIAL_MATRIX) { 5598 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5599 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5600 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5601 } 5602 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5603 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5604 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5605 PetscFunctionReturn(0); 5606 } 5607 5608 /*MC 5609 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5610 5611 Options Database Keys: 5612 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5613 5614 Level: beginner 5615 5616 .seealso: MatCreateAIJ() 5617 M*/ 5618 5619 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5620 { 5621 Mat_MPIAIJ *b; 5622 PetscErrorCode ierr; 5623 PetscMPIInt size; 5624 5625 PetscFunctionBegin; 5626 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5627 5628 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5629 B->data = (void*)b; 5630 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5631 B->assembled = PETSC_FALSE; 5632 B->insertmode = NOT_SET_VALUES; 5633 b->size = size; 5634 5635 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5636 5637 /* build cache for off array entries formed */ 5638 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5639 5640 b->donotstash = PETSC_FALSE; 5641 b->colmap = 0; 5642 b->garray = 0; 5643 b->roworiented = PETSC_TRUE; 5644 5645 /* stuff used for matrix vector multiply */ 5646 b->lvec = NULL; 5647 b->Mvctx = NULL; 5648 5649 /* stuff for MatGetRow() */ 5650 b->rowindices = 0; 5651 b->rowvalues = 0; 5652 b->getrowactive = PETSC_FALSE; 5653 5654 /* flexible pointer used in CUSP/CUSPARSE classes */ 5655 b->spptr = NULL; 5656 5657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5666 #if defined(PETSC_HAVE_MKL_SPARSE) 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5668 #endif 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5671 #if defined(PETSC_HAVE_ELEMENTAL) 5672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5673 #endif 5674 #if defined(PETSC_HAVE_HYPRE) 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5676 #endif 5677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5682 #if defined(PETSC_HAVE_HYPRE) 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5684 #endif 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5687 PetscFunctionReturn(0); 5688 } 5689 5690 /*@C 5691 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5692 and "off-diagonal" part of the matrix in CSR format. 5693 5694 Collective on MPI_Comm 5695 5696 Input Parameters: 5697 + comm - MPI communicator 5698 . m - number of local rows (Cannot be PETSC_DECIDE) 5699 . n - This value should be the same as the local size used in creating the 5700 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5701 calculated if N is given) For square matrices n is almost always m. 5702 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5703 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5704 . i - row indices for "diagonal" portion of matrix 5705 . j - column indices 5706 . a - matrix values 5707 . oi - row indices for "off-diagonal" portion of matrix 5708 . oj - column indices 5709 - oa - matrix values 5710 5711 Output Parameter: 5712 . mat - the matrix 5713 5714 Level: advanced 5715 5716 Notes: 5717 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5718 must free the arrays once the matrix has been destroyed and not before. 5719 5720 The i and j indices are 0 based 5721 5722 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5723 5724 This sets local rows and cannot be used to set off-processor values. 5725 5726 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5727 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5728 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5729 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5730 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5731 communication if it is known that only local entries will be set. 5732 5733 .keywords: matrix, aij, compressed row, sparse, parallel 5734 5735 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5736 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5737 @*/ 5738 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5739 { 5740 PetscErrorCode ierr; 5741 Mat_MPIAIJ *maij; 5742 5743 PetscFunctionBegin; 5744 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5745 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5746 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5747 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5748 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5749 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5750 maij = (Mat_MPIAIJ*) (*mat)->data; 5751 5752 (*mat)->preallocated = PETSC_TRUE; 5753 5754 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5755 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5756 5757 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5758 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5759 5760 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5761 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5762 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5763 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5764 5765 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5766 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5767 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5768 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5769 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5770 PetscFunctionReturn(0); 5771 } 5772 5773 /* 5774 Special version for direct calls from Fortran 5775 */ 5776 #include <petsc/private/fortranimpl.h> 5777 5778 /* Change these macros so can be used in void function */ 5779 #undef CHKERRQ 5780 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5781 #undef SETERRQ2 5782 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5783 #undef SETERRQ3 5784 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5785 #undef SETERRQ 5786 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5787 5788 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5789 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5790 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5791 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5792 #else 5793 #endif 5794 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5795 { 5796 Mat mat = *mmat; 5797 PetscInt m = *mm, n = *mn; 5798 InsertMode addv = *maddv; 5799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5800 PetscScalar value; 5801 PetscErrorCode ierr; 5802 5803 MatCheckPreallocated(mat,1); 5804 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5805 5806 #if defined(PETSC_USE_DEBUG) 5807 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5808 #endif 5809 { 5810 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5811 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5812 PetscBool roworiented = aij->roworiented; 5813 5814 /* Some Variables required in the macro */ 5815 Mat A = aij->A; 5816 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5817 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5818 MatScalar *aa = a->a; 5819 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5820 Mat B = aij->B; 5821 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5822 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5823 MatScalar *ba = b->a; 5824 5825 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5826 PetscInt nonew = a->nonew; 5827 MatScalar *ap1,*ap2; 5828 5829 PetscFunctionBegin; 5830 for (i=0; i<m; i++) { 5831 if (im[i] < 0) continue; 5832 #if defined(PETSC_USE_DEBUG) 5833 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5834 #endif 5835 if (im[i] >= rstart && im[i] < rend) { 5836 row = im[i] - rstart; 5837 lastcol1 = -1; 5838 rp1 = aj + ai[row]; 5839 ap1 = aa + ai[row]; 5840 rmax1 = aimax[row]; 5841 nrow1 = ailen[row]; 5842 low1 = 0; 5843 high1 = nrow1; 5844 lastcol2 = -1; 5845 rp2 = bj + bi[row]; 5846 ap2 = ba + bi[row]; 5847 rmax2 = bimax[row]; 5848 nrow2 = bilen[row]; 5849 low2 = 0; 5850 high2 = nrow2; 5851 5852 for (j=0; j<n; j++) { 5853 if (roworiented) value = v[i*n+j]; 5854 else value = v[i+j*m]; 5855 if (in[j] >= cstart && in[j] < cend) { 5856 col = in[j] - cstart; 5857 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5858 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5859 } else if (in[j] < 0) continue; 5860 #if defined(PETSC_USE_DEBUG) 5861 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5862 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5863 #endif 5864 else { 5865 if (mat->was_assembled) { 5866 if (!aij->colmap) { 5867 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5868 } 5869 #if defined(PETSC_USE_CTABLE) 5870 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5871 col--; 5872 #else 5873 col = aij->colmap[in[j]] - 1; 5874 #endif 5875 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5876 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5877 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5878 col = in[j]; 5879 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5880 B = aij->B; 5881 b = (Mat_SeqAIJ*)B->data; 5882 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5883 rp2 = bj + bi[row]; 5884 ap2 = ba + bi[row]; 5885 rmax2 = bimax[row]; 5886 nrow2 = bilen[row]; 5887 low2 = 0; 5888 high2 = nrow2; 5889 bm = aij->B->rmap->n; 5890 ba = b->a; 5891 } 5892 } else col = in[j]; 5893 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5894 } 5895 } 5896 } else if (!aij->donotstash) { 5897 if (roworiented) { 5898 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5899 } else { 5900 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5901 } 5902 } 5903 } 5904 } 5905 PetscFunctionReturnVoid(); 5906 } 5907