1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 52 PetscFunctionBegin; 53 if (mat->A) { 54 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 55 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 56 } 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 61 { 62 PetscErrorCode ierr; 63 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 64 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 65 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 66 const PetscInt *ia,*ib; 67 const MatScalar *aa,*bb; 68 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 69 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 70 71 PetscFunctionBegin; 72 *keptrows = 0; 73 ia = a->i; 74 ib = b->i; 75 for (i=0; i<m; i++) { 76 na = ia[i+1] - ia[i]; 77 nb = ib[i+1] - ib[i]; 78 if (!na && !nb) { 79 cnt++; 80 goto ok1; 81 } 82 aa = a->a + ia[i]; 83 for (j=0; j<na; j++) { 84 if (aa[j] != 0.0) goto ok1; 85 } 86 bb = b->a + ib[i]; 87 for (j=0; j <nb; j++) { 88 if (bb[j] != 0.0) goto ok1; 89 } 90 cnt++; 91 ok1:; 92 } 93 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 94 if (!n0rows) PetscFunctionReturn(0); 95 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 96 cnt = 0; 97 for (i=0; i<m; i++) { 98 na = ia[i+1] - ia[i]; 99 nb = ib[i+1] - ib[i]; 100 if (!na && !nb) continue; 101 aa = a->a + ia[i]; 102 for (j=0; j<na;j++) { 103 if (aa[j] != 0.0) { 104 rows[cnt++] = rstart + i; 105 goto ok2; 106 } 107 } 108 bb = b->a + ib[i]; 109 for (j=0; j<nb; j++) { 110 if (bb[j] != 0.0) { 111 rows[cnt++] = rstart + i; 112 goto ok2; 113 } 114 } 115 ok2:; 116 } 117 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 122 { 123 PetscErrorCode ierr; 124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 125 PetscBool cong; 126 127 PetscFunctionBegin; 128 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 129 if (Y->assembled && cong) { 130 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 131 } else { 132 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 138 { 139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 140 PetscErrorCode ierr; 141 PetscInt i,rstart,nrows,*rows; 142 143 PetscFunctionBegin; 144 *zrows = NULL; 145 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 146 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 147 for (i=0; i<nrows; i++) rows[i] += rstart; 148 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 153 { 154 PetscErrorCode ierr; 155 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 156 PetscInt i,n,*garray = aij->garray; 157 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 158 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 159 PetscReal *work; 160 161 PetscFunctionBegin; 162 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 163 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 164 if (type == NORM_2) { 165 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 166 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 167 } 168 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 169 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 170 } 171 } else if (type == NORM_1) { 172 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 173 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 174 } 175 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 176 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 177 } 178 } else if (type == NORM_INFINITY) { 179 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 180 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 181 } 182 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 183 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 184 } 185 186 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 187 if (type == NORM_INFINITY) { 188 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 189 } else { 190 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 191 } 192 ierr = PetscFree(work);CHKERRQ(ierr); 193 if (type == NORM_2) { 194 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 200 { 201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 202 IS sis,gis; 203 PetscErrorCode ierr; 204 const PetscInt *isis,*igis; 205 PetscInt n,*iis,nsis,ngis,rstart,i; 206 207 PetscFunctionBegin; 208 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 209 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 210 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 211 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 212 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 213 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 214 215 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 216 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 218 n = ngis + nsis; 219 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 220 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 221 for (i=0; i<n; i++) iis[i] += rstart; 222 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 223 224 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 225 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 226 ierr = ISDestroy(&sis);CHKERRQ(ierr); 227 ierr = ISDestroy(&gis);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* 232 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 233 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 234 235 Only for square matrices 236 237 Used by a preconditioner, hence PETSC_EXTERN 238 */ 239 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 240 { 241 PetscMPIInt rank,size; 242 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 243 PetscErrorCode ierr; 244 Mat mat; 245 Mat_SeqAIJ *gmata; 246 PetscMPIInt tag; 247 MPI_Status status; 248 PetscBool aij; 249 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 250 251 PetscFunctionBegin; 252 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 253 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 254 if (!rank) { 255 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 256 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 257 } 258 if (reuse == MAT_INITIAL_MATRIX) { 259 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 260 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 261 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 262 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 263 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 264 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 265 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 266 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 267 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 268 269 rowners[0] = 0; 270 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 271 rstart = rowners[rank]; 272 rend = rowners[rank+1]; 273 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 274 if (!rank) { 275 gmata = (Mat_SeqAIJ*) gmat->data; 276 /* send row lengths to all processors */ 277 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 278 for (i=1; i<size; i++) { 279 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 280 } 281 /* determine number diagonal and off-diagonal counts */ 282 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 284 jj = 0; 285 for (i=0; i<m; i++) { 286 for (j=0; j<dlens[i]; j++) { 287 if (gmata->j[jj] < rstart) ld[i]++; 288 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 289 jj++; 290 } 291 } 292 /* send column indices to other processes */ 293 for (i=1; i<size; i++) { 294 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 295 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 296 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 } 298 299 /* send numerical values to other processes */ 300 for (i=1; i<size; i++) { 301 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 302 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 303 } 304 gmataa = gmata->a; 305 gmataj = gmata->j; 306 307 } else { 308 /* receive row lengths */ 309 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* receive column indices */ 311 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 312 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 313 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 314 /* determine number diagonal and off-diagonal counts */ 315 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 316 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 317 jj = 0; 318 for (i=0; i<m; i++) { 319 for (j=0; j<dlens[i]; j++) { 320 if (gmataj[jj] < rstart) ld[i]++; 321 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 322 jj++; 323 } 324 } 325 /* receive numerical values */ 326 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 327 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 328 } 329 /* set preallocation */ 330 for (i=0; i<m; i++) { 331 dlens[i] -= olens[i]; 332 } 333 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 334 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 335 336 for (i=0; i<m; i++) { 337 dlens[i] += olens[i]; 338 } 339 cnt = 0; 340 for (i=0; i<m; i++) { 341 row = rstart + i; 342 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 343 cnt += dlens[i]; 344 } 345 if (rank) { 346 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 347 } 348 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 349 ierr = PetscFree(rowners);CHKERRQ(ierr); 350 351 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 352 353 *inmat = mat; 354 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 355 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 356 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 357 mat = *inmat; 358 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 359 if (!rank) { 360 /* send numerical values to other processes */ 361 gmata = (Mat_SeqAIJ*) gmat->data; 362 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 363 gmataa = gmata->a; 364 for (i=1; i<size; i++) { 365 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 366 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 367 } 368 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 369 } else { 370 /* receive numerical values from process 0*/ 371 nz = Ad->nz + Ao->nz; 372 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 373 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 374 } 375 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 376 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 377 ad = Ad->a; 378 ao = Ao->a; 379 if (mat->rmap->n) { 380 i = 0; 381 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 for (i=1; i<mat->rmap->n; i++) { 385 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 386 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 387 } 388 i--; 389 if (mat->rmap->n) { 390 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 391 } 392 if (rank) { 393 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 394 } 395 } 396 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 Local utility routine that creates a mapping from the global column 403 number to the local number in the off-diagonal part of the local 404 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 405 a slightly higher hash table cost; without it it is not scalable (each processor 406 has an order N integer array but is fast to acess. 407 */ 408 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 409 { 410 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 411 PetscErrorCode ierr; 412 PetscInt n = aij->B->cmap->n,i; 413 414 PetscFunctionBegin; 415 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 416 #if defined(PETSC_USE_CTABLE) 417 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 418 for (i=0; i<n; i++) { 419 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 420 } 421 #else 422 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 423 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 424 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 425 #endif 426 PetscFunctionReturn(0); 427 } 428 429 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 430 { \ 431 if (col <= lastcol1) low1 = 0; \ 432 else high1 = nrow1; \ 433 lastcol1 = col;\ 434 while (high1-low1 > 5) { \ 435 t = (low1+high1)/2; \ 436 if (rp1[t] > col) high1 = t; \ 437 else low1 = t; \ 438 } \ 439 for (_i=low1; _i<high1; _i++) { \ 440 if (rp1[_i] > col) break; \ 441 if (rp1[_i] == col) { \ 442 if (addv == ADD_VALUES) ap1[_i] += value; \ 443 else ap1[_i] = value; \ 444 goto a_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 448 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 449 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 450 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 451 N = nrow1++ - 1; a->nz++; high1++; \ 452 /* shift up all the later entries in this row */ \ 453 for (ii=N; ii>=_i; ii--) { \ 454 rp1[ii+1] = rp1[ii]; \ 455 ap1[ii+1] = ap1[ii]; \ 456 } \ 457 rp1[_i] = col; \ 458 ap1[_i] = value; \ 459 A->nonzerostate++;\ 460 a_noinsert: ; \ 461 ailen[row] = nrow1; \ 462 } 463 464 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 465 { \ 466 if (col <= lastcol2) low2 = 0; \ 467 else high2 = nrow2; \ 468 lastcol2 = col; \ 469 while (high2-low2 > 5) { \ 470 t = (low2+high2)/2; \ 471 if (rp2[t] > col) high2 = t; \ 472 else low2 = t; \ 473 } \ 474 for (_i=low2; _i<high2; _i++) { \ 475 if (rp2[_i] > col) break; \ 476 if (rp2[_i] == col) { \ 477 if (addv == ADD_VALUES) ap2[_i] += value; \ 478 else ap2[_i] = value; \ 479 goto b_noinsert; \ 480 } \ 481 } \ 482 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 484 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 485 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 486 N = nrow2++ - 1; b->nz++; high2++; \ 487 /* shift up all the later entries in this row */ \ 488 for (ii=N; ii>=_i; ii--) { \ 489 rp2[ii+1] = rp2[ii]; \ 490 ap2[ii+1] = ap2[ii]; \ 491 } \ 492 rp2[_i] = col; \ 493 ap2[_i] = value; \ 494 B->nonzerostate++; \ 495 b_noinsert: ; \ 496 bilen[row] = nrow2; \ 497 } 498 499 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 500 { 501 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 503 PetscErrorCode ierr; 504 PetscInt l,*garray = mat->garray,diag; 505 506 PetscFunctionBegin; 507 /* code only works for square matrices A */ 508 509 /* find size of row to the left of the diagonal part */ 510 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 511 row = row - diag; 512 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 513 if (garray[b->j[b->i[row]+l]] > diag) break; 514 } 515 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* diagonal part */ 518 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 519 520 /* right of diagonal part */ 521 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 522 PetscFunctionReturn(0); 523 } 524 525 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 526 { 527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 528 PetscScalar value; 529 PetscErrorCode ierr; 530 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 531 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 532 PetscBool roworiented = aij->roworiented; 533 534 /* Some Variables required in the macro */ 535 Mat A = aij->A; 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 538 MatScalar *aa = a->a; 539 PetscBool ignorezeroentries = a->ignorezeroentries; 540 Mat B = aij->B; 541 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 542 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 543 MatScalar *ba = b->a; 544 545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 546 PetscInt nonew; 547 MatScalar *ap1,*ap2; 548 549 PetscFunctionBegin; 550 for (i=0; i<m; i++) { 551 if (im[i] < 0) continue; 552 #if defined(PETSC_USE_DEBUG) 553 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 554 #endif 555 if (im[i] >= rstart && im[i] < rend) { 556 row = im[i] - rstart; 557 lastcol1 = -1; 558 rp1 = aj + ai[row]; 559 ap1 = aa + ai[row]; 560 rmax1 = aimax[row]; 561 nrow1 = ailen[row]; 562 low1 = 0; 563 high1 = nrow1; 564 lastcol2 = -1; 565 rp2 = bj + bi[row]; 566 ap2 = ba + bi[row]; 567 rmax2 = bimax[row]; 568 nrow2 = bilen[row]; 569 low2 = 0; 570 high2 = nrow2; 571 572 for (j=0; j<n; j++) { 573 if (roworiented) value = v[i*n+j]; 574 else value = v[i+j*m]; 575 if (in[j] >= cstart && in[j] < cend) { 576 col = in[j] - cstart; 577 nonew = a->nonew; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 579 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 580 } else if (in[j] < 0) continue; 581 #if defined(PETSC_USE_DEBUG) 582 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 583 #endif 584 else { 585 if (mat->was_assembled) { 586 if (!aij->colmap) { 587 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 588 } 589 #if defined(PETSC_USE_CTABLE) 590 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 591 col--; 592 #else 593 col = aij->colmap[in[j]] - 1; 594 #endif 595 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 596 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 597 col = in[j]; 598 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 599 B = aij->B; 600 b = (Mat_SeqAIJ*)B->data; 601 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 602 rp2 = bj + bi[row]; 603 ap2 = ba + bi[row]; 604 rmax2 = bimax[row]; 605 nrow2 = bilen[row]; 606 low2 = 0; 607 high2 = nrow2; 608 bm = aij->B->rmap->n; 609 ba = b->a; 610 } else if (col < 0) { 611 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 612 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 613 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 618 } 619 } 620 } else { 621 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 626 } else { 627 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 628 } 629 } 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode MatSetValues_MPIAIJ_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscInt *dnz, const PetscInt *onz) 636 { 637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 638 Mat A = aij->A; /* diagonal part of the matrix */ 639 Mat B = aij->B; /* offdiagonal part of the matrix */ 640 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 641 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 642 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 643 PetscInt *ailen = a->ilen,*aj = a->j; 644 PetscInt *bilen = b->ilen,*bj = b->j; 645 PetscInt am = aij->A->rmap->n,i,j,k; 646 PetscInt col, diag_so_far=0, offd_so_far=0; 647 648 PetscFunctionBegin; 649 /* Iterate over all rows of the matrix */ 650 for (j=0; j<am; j++) { 651 /* set the left off-diagonal */ 652 for (i=0; i<onz[j]; i++) { 653 col = i + mat_i[j]; 654 /* If column is in the diagonal... */ 655 if (mat_j[col] >= cstart && mat_j[col] < cend) { 656 break; 657 } 658 bj[i+offd_so_far] = mat_j[col]; 659 } 660 /* ... set the diagonal elements */ 661 for (k=0; k<dnz[j]; k++) { 662 col = k + mat_i[j] + i; 663 aj[diag_so_far+k] = mat_j[col] - cstart; 664 ailen[j] = dnz[j]; 665 } 666 diag_so_far += dnz[j]; 667 /* Set the right off-diagonal */ 668 for (i=i+dnz[j]; i<dnz[j]+onz[j]; i++) { 669 col = i + mat_i[j]; 670 bj[i-dnz[j]+offd_so_far] = mat_j[col]; 671 } 672 bilen[j] = onz[j]; 673 offd_so_far += onz[j]; 674 } 675 PetscFunctionReturn(0); 676 } 677 678 679 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 682 PetscErrorCode ierr; 683 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 684 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 685 686 PetscFunctionBegin; 687 for (i=0; i<m; i++) { 688 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 689 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 690 if (idxm[i] >= rstart && idxm[i] < rend) { 691 row = idxm[i] - rstart; 692 for (j=0; j<n; j++) { 693 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 694 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 695 if (idxn[j] >= cstart && idxn[j] < cend) { 696 col = idxn[j] - cstart; 697 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 698 } else { 699 if (!aij->colmap) { 700 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 701 } 702 #if defined(PETSC_USE_CTABLE) 703 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 704 col--; 705 #else 706 col = aij->colmap[idxn[j]] - 1; 707 #endif 708 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 709 else { 710 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 711 } 712 } 713 } 714 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 720 721 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 722 { 723 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 724 PetscErrorCode ierr; 725 PetscInt nstash,reallocs; 726 727 PetscFunctionBegin; 728 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 729 730 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 731 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 732 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 737 { 738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 739 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 740 PetscErrorCode ierr; 741 PetscMPIInt n; 742 PetscInt i,j,rstart,ncols,flg; 743 PetscInt *row,*col; 744 PetscBool other_disassembled; 745 PetscScalar *val; 746 747 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 748 749 PetscFunctionBegin; 750 if (!aij->donotstash && !mat->nooffprocentries) { 751 while (1) { 752 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 753 if (!flg) break; 754 755 for (i=0; i<n; ) { 756 /* Now identify the consecutive vals belonging to the same row */ 757 for (j=i,rstart=row[j]; j<n; j++) { 758 if (row[j] != rstart) break; 759 } 760 if (j < n) ncols = j-i; 761 else ncols = n-i; 762 /* Now assemble all these values with a single function call */ 763 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 764 765 i = j; 766 } 767 } 768 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 769 } 770 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 771 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 772 773 /* determine if any processor has disassembled, if so we must 774 also disassemble ourselfs, in order that we may reassemble. */ 775 /* 776 if nonzero structure of submatrix B cannot change then we know that 777 no processor disassembled thus we can skip this stuff 778 */ 779 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 780 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 781 if (mat->was_assembled && !other_disassembled) { 782 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 783 } 784 } 785 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 786 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 787 } 788 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 789 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 790 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 791 792 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 793 794 aij->rowvalues = 0; 795 796 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 797 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 798 799 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 800 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 801 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 802 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 808 { 809 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 810 PetscErrorCode ierr; 811 812 PetscFunctionBegin; 813 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 814 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 815 PetscFunctionReturn(0); 816 } 817 818 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 819 { 820 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 821 PetscInt *lrows; 822 PetscInt r, len; 823 PetscBool cong; 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 /* get locally owned rows */ 828 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 829 /* fix right hand side if needed */ 830 if (x && b) { 831 const PetscScalar *xx; 832 PetscScalar *bb; 833 834 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 835 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 836 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 837 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 838 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 839 } 840 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 841 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 842 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 843 if ((diag != 0.0) && cong) { 844 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 845 } else if (diag != 0.0) { 846 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 847 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 848 for (r = 0; r < len; ++r) { 849 const PetscInt row = lrows[r] + A->rmap->rstart; 850 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 851 } 852 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 853 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 854 } else { 855 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(lrows);CHKERRQ(ierr); 858 859 /* only change matrix nonzero state if pattern was allowed to be changed */ 860 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 861 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 862 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 863 } 864 PetscFunctionReturn(0); 865 } 866 867 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 868 { 869 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 870 PetscErrorCode ierr; 871 PetscMPIInt n = A->rmap->n; 872 PetscInt i,j,r,m,p = 0,len = 0; 873 PetscInt *lrows,*owners = A->rmap->range; 874 PetscSFNode *rrows; 875 PetscSF sf; 876 const PetscScalar *xx; 877 PetscScalar *bb,*mask; 878 Vec xmask,lmask; 879 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 880 const PetscInt *aj, *ii,*ridx; 881 PetscScalar *aa; 882 883 PetscFunctionBegin; 884 /* Create SF where leaves are input rows and roots are owned rows */ 885 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 886 for (r = 0; r < n; ++r) lrows[r] = -1; 887 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 888 for (r = 0; r < N; ++r) { 889 const PetscInt idx = rows[r]; 890 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 891 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 892 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 893 } 894 rrows[r].rank = p; 895 rrows[r].index = rows[r] - owners[p]; 896 } 897 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 898 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 899 /* Collect flags for rows to be zeroed */ 900 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 901 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 902 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 903 /* Compress and put in row numbers */ 904 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 905 /* zero diagonal part of matrix */ 906 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 907 /* handle off diagonal part of matrix */ 908 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 909 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 910 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 911 for (i=0; i<len; i++) bb[lrows[i]] = 1; 912 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 913 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 916 if (x) { 917 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 921 } 922 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 923 /* remove zeroed rows of off diagonal matrix */ 924 ii = aij->i; 925 for (i=0; i<len; i++) { 926 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 927 } 928 /* loop over all elements of off process part of matrix zeroing removed columns*/ 929 if (aij->compressedrow.use) { 930 m = aij->compressedrow.nrows; 931 ii = aij->compressedrow.i; 932 ridx = aij->compressedrow.rindex; 933 for (i=0; i<m; i++) { 934 n = ii[i+1] - ii[i]; 935 aj = aij->j + ii[i]; 936 aa = aij->a + ii[i]; 937 938 for (j=0; j<n; j++) { 939 if (PetscAbsScalar(mask[*aj])) { 940 if (b) bb[*ridx] -= *aa*xx[*aj]; 941 *aa = 0.0; 942 } 943 aa++; 944 aj++; 945 } 946 ridx++; 947 } 948 } else { /* do not use compressed row format */ 949 m = l->B->rmap->n; 950 for (i=0; i<m; i++) { 951 n = ii[i+1] - ii[i]; 952 aj = aij->j + ii[i]; 953 aa = aij->a + ii[i]; 954 for (j=0; j<n; j++) { 955 if (PetscAbsScalar(mask[*aj])) { 956 if (b) bb[i] -= *aa*xx[*aj]; 957 *aa = 0.0; 958 } 959 aa++; 960 aj++; 961 } 962 } 963 } 964 if (x) { 965 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 966 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 967 } 968 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 969 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 970 ierr = PetscFree(lrows);CHKERRQ(ierr); 971 972 /* only change matrix nonzero state if pattern was allowed to be changed */ 973 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 974 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 975 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 976 } 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 PetscInt nt; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 989 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 990 991 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 993 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 1003 PetscFunctionBegin; 1004 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1009 { 1010 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1011 PetscErrorCode ierr; 1012 VecScatter Mvctx = a->Mvctx; 1013 1014 PetscFunctionBegin; 1015 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1016 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1018 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1024 { 1025 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1026 PetscErrorCode ierr; 1027 PetscBool merged; 1028 1029 PetscFunctionBegin; 1030 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1031 /* do nondiagonal part */ 1032 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1033 if (!merged) { 1034 /* send it on its way */ 1035 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1036 /* do local part */ 1037 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1038 /* receive remote parts: note this assumes the values are not actually */ 1039 /* added in yy until the next line, */ 1040 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1041 } else { 1042 /* do local part */ 1043 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1044 /* send it on its way */ 1045 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1046 /* values actually were received in the Begin() but we need to call this nop */ 1047 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1048 } 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1053 { 1054 MPI_Comm comm; 1055 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1056 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1057 IS Me,Notme; 1058 PetscErrorCode ierr; 1059 PetscInt M,N,first,last,*notme,i; 1060 PetscMPIInt size; 1061 1062 PetscFunctionBegin; 1063 /* Easy test: symmetric diagonal block */ 1064 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1065 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1066 if (!*f) PetscFunctionReturn(0); 1067 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1068 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1069 if (size == 1) PetscFunctionReturn(0); 1070 1071 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1072 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1073 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1074 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1075 for (i=0; i<first; i++) notme[i] = i; 1076 for (i=last; i<M; i++) notme[i-last+first] = i; 1077 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1078 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1079 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1080 Aoff = Aoffs[0]; 1081 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1082 Boff = Boffs[0]; 1083 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1084 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1086 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1088 ierr = PetscFree(notme);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1093 { 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 /* do nondiagonal part */ 1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1109 /* send it on its way */ 1110 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1111 /* do local part */ 1112 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1113 /* receive remote parts */ 1114 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 /* 1119 This only works correctly for square matrices where the subblock A->A is the 1120 diagonal block 1121 */ 1122 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1123 { 1124 PetscErrorCode ierr; 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1126 1127 PetscFunctionBegin; 1128 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1129 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1130 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1135 { 1136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1141 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1167 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1168 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1169 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1170 1171 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1180 #if defined(PETSC_HAVE_ELEMENTAL) 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1182 #endif 1183 #if defined(PETSC_HAVE_HYPRE) 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1185 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1186 #endif 1187 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1188 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1193 { 1194 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1195 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1196 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1197 PetscErrorCode ierr; 1198 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1199 int fd; 1200 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1201 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1202 PetscScalar *column_values; 1203 PetscInt message_count,flowcontrolcount; 1204 FILE *file; 1205 1206 PetscFunctionBegin; 1207 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1208 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1209 nz = A->nz + B->nz; 1210 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1211 if (!rank) { 1212 header[0] = MAT_FILE_CLASSID; 1213 header[1] = mat->rmap->N; 1214 header[2] = mat->cmap->N; 1215 1216 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1218 /* get largest number of rows any processor has */ 1219 rlen = mat->rmap->n; 1220 range = mat->rmap->range; 1221 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1222 } else { 1223 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1224 rlen = mat->rmap->n; 1225 } 1226 1227 /* load up the local row counts */ 1228 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1229 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1230 1231 /* store the row lengths to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 for (i=1; i<size; i++) { 1236 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1237 rlen = range[i+1] - range[i]; 1238 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1244 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1246 } 1247 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1248 1249 /* load up the local column indices */ 1250 nzmax = nz; /* th processor needs space a largest processor needs */ 1251 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1253 cnt = 0; 1254 for (i=0; i<mat->rmap->n; i++) { 1255 for (j=B->i[i]; j<B->i[i+1]; j++) { 1256 if ((col = garray[B->j[j]]) > cstart) break; 1257 column_indices[cnt++] = col; 1258 } 1259 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1260 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1261 } 1262 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1263 1264 /* store the column indices to the file */ 1265 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1266 if (!rank) { 1267 MPI_Status status; 1268 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1269 for (i=1; i<size; i++) { 1270 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1271 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1272 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1273 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1274 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1275 } 1276 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1277 } else { 1278 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1279 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1282 } 1283 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1284 1285 /* load up the local column values */ 1286 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1287 cnt = 0; 1288 for (i=0; i<mat->rmap->n; i++) { 1289 for (j=B->i[i]; j<B->i[i+1]; j++) { 1290 if (garray[B->j[j]] > cstart) break; 1291 column_values[cnt++] = B->a[j]; 1292 } 1293 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1294 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1295 } 1296 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1297 1298 /* store the column values to the file */ 1299 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1300 if (!rank) { 1301 MPI_Status status; 1302 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1303 for (i=1; i<size; i++) { 1304 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1305 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1306 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1307 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1308 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1309 } 1310 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1311 } else { 1312 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1313 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1316 } 1317 ierr = PetscFree(column_values);CHKERRQ(ierr); 1318 1319 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1320 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #include <petscdraw.h> 1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 PetscErrorCode ierr; 1329 PetscMPIInt rank = aij->rank,size = aij->size; 1330 PetscBool isdraw,iascii,isbinary; 1331 PetscViewer sviewer; 1332 PetscViewerFormat format; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1336 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1337 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1338 if (iascii) { 1339 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1340 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1341 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1342 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1343 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1344 for (i=0; i<(PetscInt)size; i++) { 1345 nmax = PetscMax(nmax,nz[i]); 1346 nmin = PetscMin(nmin,nz[i]); 1347 navg += nz[i]; 1348 } 1349 ierr = PetscFree(nz);CHKERRQ(ierr); 1350 navg = navg/size; 1351 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1355 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1356 MatInfo info; 1357 PetscBool inodes; 1358 1359 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1360 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1361 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1363 if (!inodes) { 1364 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1365 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1368 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1369 } 1370 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1371 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1372 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1374 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1375 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1376 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1377 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1380 PetscInt inodecount,inodelimit,*inodes; 1381 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1382 if (inodes) { 1383 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1384 } else { 1385 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1386 } 1387 PetscFunctionReturn(0); 1388 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1389 PetscFunctionReturn(0); 1390 } 1391 } else if (isbinary) { 1392 if (size == 1) { 1393 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1394 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1395 } else { 1396 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } else if (isdraw) { 1400 PetscDraw draw; 1401 PetscBool isnull; 1402 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1403 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1404 if (isnull) PetscFunctionReturn(0); 1405 } 1406 1407 { 1408 /* assemble the entire matrix onto first processor. */ 1409 Mat A; 1410 Mat_SeqAIJ *Aloc; 1411 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1412 MatScalar *a; 1413 1414 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1415 if (!rank) { 1416 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1417 } else { 1418 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1419 } 1420 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1421 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1422 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1423 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1424 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1425 1426 /* copy over the A part */ 1427 Aloc = (Mat_SeqAIJ*)aij->A->data; 1428 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1429 row = mat->rmap->rstart; 1430 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1431 for (i=0; i<m; i++) { 1432 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1433 row++; 1434 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1435 } 1436 aj = Aloc->j; 1437 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1438 1439 /* copy over the B part */ 1440 Aloc = (Mat_SeqAIJ*)aij->B->data; 1441 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1442 row = mat->rmap->rstart; 1443 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1444 ct = cols; 1445 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1446 for (i=0; i<m; i++) { 1447 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1448 row++; 1449 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1450 } 1451 ierr = PetscFree(ct);CHKERRQ(ierr); 1452 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1453 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 /* 1455 Everyone has to call to draw the matrix since the graphics waits are 1456 synchronized across all processors that share the PetscDraw object 1457 */ 1458 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1459 if (!rank) { 1460 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1461 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1462 } 1463 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1464 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1465 ierr = MatDestroy(&A);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1471 { 1472 PetscErrorCode ierr; 1473 PetscBool iascii,isdraw,issocket,isbinary; 1474 1475 PetscFunctionBegin; 1476 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1477 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1478 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1479 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1480 if (iascii || isdraw || isbinary || issocket) { 1481 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1487 { 1488 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1489 PetscErrorCode ierr; 1490 Vec bb1 = 0; 1491 PetscBool hasop; 1492 1493 PetscFunctionBegin; 1494 if (flag == SOR_APPLY_UPPER) { 1495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 1499 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1500 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1501 } 1502 1503 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1504 if (flag & SOR_ZERO_INITIAL_GUESS) { 1505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1506 its--; 1507 } 1508 1509 while (its--) { 1510 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 1513 /* update rhs: bb1 = bb - B*x */ 1514 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1515 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1516 1517 /* local sweep */ 1518 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1519 } 1520 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1521 if (flag & SOR_ZERO_INITIAL_GUESS) { 1522 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1523 its--; 1524 } 1525 while (its--) { 1526 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1527 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 1529 /* update rhs: bb1 = bb - B*x */ 1530 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1531 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1532 1533 /* local sweep */ 1534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1535 } 1536 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1537 if (flag & SOR_ZERO_INITIAL_GUESS) { 1538 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1539 its--; 1540 } 1541 while (its--) { 1542 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1543 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 1545 /* update rhs: bb1 = bb - B*x */ 1546 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1547 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1548 1549 /* local sweep */ 1550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1551 } 1552 } else if (flag & SOR_EISENSTAT) { 1553 Vec xx1; 1554 1555 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1556 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1557 1558 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1560 if (!mat->diag) { 1561 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1562 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1563 } 1564 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1565 if (hasop) { 1566 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1567 } else { 1568 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1569 } 1570 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1571 1572 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1576 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1577 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1578 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1579 1580 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1581 1582 matin->factorerrortype = mat->A->factorerrortype; 1583 PetscFunctionReturn(0); 1584 } 1585 1586 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1587 { 1588 Mat aA,aB,Aperm; 1589 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1590 PetscScalar *aa,*ba; 1591 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1592 PetscSF rowsf,sf; 1593 IS parcolp = NULL; 1594 PetscBool done; 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1599 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1600 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1601 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1602 1603 /* Invert row permutation to find out where my rows should go */ 1604 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1605 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1606 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1607 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1608 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1609 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1610 1611 /* Invert column permutation to find out where my columns should go */ 1612 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1613 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1614 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1615 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1616 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1617 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1618 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1619 1620 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1621 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1622 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1623 1624 /* Find out where my gcols should go */ 1625 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1626 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1627 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1628 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1629 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1630 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1631 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1632 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1633 1634 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1635 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1636 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) { 1638 PetscInt row = rdest[i],rowner; 1639 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1640 for (j=ai[i]; j<ai[i+1]; j++) { 1641 PetscInt cowner,col = cdest[aj[j]]; 1642 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1643 if (rowner == cowner) dnnz[i]++; 1644 else onnz[i]++; 1645 } 1646 for (j=bi[i]; j<bi[i+1]; j++) { 1647 PetscInt cowner,col = gcdest[bj[j]]; 1648 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1649 if (rowner == cowner) dnnz[i]++; 1650 else onnz[i]++; 1651 } 1652 } 1653 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1654 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1655 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1656 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1657 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1658 1659 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1660 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1661 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1662 for (i=0; i<m; i++) { 1663 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1664 PetscInt j0,rowlen; 1665 rowlen = ai[i+1] - ai[i]; 1666 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1667 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1668 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1669 } 1670 rowlen = bi[i+1] - bi[i]; 1671 for (j0=j=0; j<rowlen; j0=j) { 1672 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1673 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1674 } 1675 } 1676 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1677 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1678 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1679 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1680 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1681 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1682 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1683 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1684 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1685 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1686 *B = Aperm; 1687 PetscFunctionReturn(0); 1688 } 1689 1690 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1691 { 1692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1697 if (ghosts) *ghosts = aij->garray; 1698 PetscFunctionReturn(0); 1699 } 1700 1701 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1702 { 1703 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1704 Mat A = mat->A,B = mat->B; 1705 PetscErrorCode ierr; 1706 PetscReal isend[5],irecv[5]; 1707 1708 PetscFunctionBegin; 1709 info->block_size = 1.0; 1710 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1711 1712 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1713 isend[3] = info->memory; isend[4] = info->mallocs; 1714 1715 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1716 1717 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1718 isend[3] += info->memory; isend[4] += info->mallocs; 1719 if (flag == MAT_LOCAL) { 1720 info->nz_used = isend[0]; 1721 info->nz_allocated = isend[1]; 1722 info->nz_unneeded = isend[2]; 1723 info->memory = isend[3]; 1724 info->mallocs = isend[4]; 1725 } else if (flag == MAT_GLOBAL_MAX) { 1726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1727 1728 info->nz_used = irecv[0]; 1729 info->nz_allocated = irecv[1]; 1730 info->nz_unneeded = irecv[2]; 1731 info->memory = irecv[3]; 1732 info->mallocs = irecv[4]; 1733 } else if (flag == MAT_GLOBAL_SUM) { 1734 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1735 1736 info->nz_used = irecv[0]; 1737 info->nz_allocated = irecv[1]; 1738 info->nz_unneeded = irecv[2]; 1739 info->memory = irecv[3]; 1740 info->mallocs = irecv[4]; 1741 } 1742 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1743 info->fill_ratio_needed = 0; 1744 info->factor_mallocs = 0; 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1749 { 1750 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 switch (op) { 1755 case MAT_NEW_NONZERO_LOCATIONS: 1756 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1757 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1758 case MAT_KEEP_NONZERO_PATTERN: 1759 case MAT_NEW_NONZERO_LOCATION_ERR: 1760 case MAT_USE_INODES: 1761 case MAT_IGNORE_ZERO_ENTRIES: 1762 MatCheckPreallocated(A,1); 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1765 break; 1766 case MAT_ROW_ORIENTED: 1767 MatCheckPreallocated(A,1); 1768 a->roworiented = flg; 1769 1770 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1771 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1772 break; 1773 case MAT_NEW_DIAGONALS: 1774 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1775 break; 1776 case MAT_IGNORE_OFF_PROC_ENTRIES: 1777 a->donotstash = flg; 1778 break; 1779 case MAT_SPD: 1780 A->spd_set = PETSC_TRUE; 1781 A->spd = flg; 1782 if (flg) { 1783 A->symmetric = PETSC_TRUE; 1784 A->structurally_symmetric = PETSC_TRUE; 1785 A->symmetric_set = PETSC_TRUE; 1786 A->structurally_symmetric_set = PETSC_TRUE; 1787 } 1788 break; 1789 case MAT_SYMMETRIC: 1790 MatCheckPreallocated(A,1); 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_STRUCTURALLY_SYMMETRIC: 1794 MatCheckPreallocated(A,1); 1795 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1796 break; 1797 case MAT_HERMITIAN: 1798 MatCheckPreallocated(A,1); 1799 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_SYMMETRY_ETERNAL: 1802 MatCheckPreallocated(A,1); 1803 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1804 break; 1805 case MAT_SUBMAT_SINGLEIS: 1806 A->submat_singleis = flg; 1807 break; 1808 case MAT_STRUCTURE_ONLY: 1809 /* The option is handled directly by MatSetOption() */ 1810 break; 1811 default: 1812 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1813 } 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1818 { 1819 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1820 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1821 PetscErrorCode ierr; 1822 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1823 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1824 PetscInt *cmap,*idx_p; 1825 1826 PetscFunctionBegin; 1827 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1828 mat->getrowactive = PETSC_TRUE; 1829 1830 if (!mat->rowvalues && (idx || v)) { 1831 /* 1832 allocate enough space to hold information from the longest row. 1833 */ 1834 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1835 PetscInt max = 1,tmp; 1836 for (i=0; i<matin->rmap->n; i++) { 1837 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1838 if (max < tmp) max = tmp; 1839 } 1840 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1841 } 1842 1843 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1844 lrow = row - rstart; 1845 1846 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1847 if (!v) {pvA = 0; pvB = 0;} 1848 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1849 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1850 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1851 nztot = nzA + nzB; 1852 1853 cmap = mat->garray; 1854 if (v || idx) { 1855 if (nztot) { 1856 /* Sort by increasing column numbers, assuming A and B already sorted */ 1857 PetscInt imark = -1; 1858 if (v) { 1859 *v = v_p = mat->rowvalues; 1860 for (i=0; i<nzB; i++) { 1861 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1862 else break; 1863 } 1864 imark = i; 1865 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1866 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1867 } 1868 if (idx) { 1869 *idx = idx_p = mat->rowindices; 1870 if (imark > -1) { 1871 for (i=0; i<imark; i++) { 1872 idx_p[i] = cmap[cworkB[i]]; 1873 } 1874 } else { 1875 for (i=0; i<nzB; i++) { 1876 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1877 else break; 1878 } 1879 imark = i; 1880 } 1881 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1882 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1883 } 1884 } else { 1885 if (idx) *idx = 0; 1886 if (v) *v = 0; 1887 } 1888 } 1889 *nz = nztot; 1890 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1891 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1896 { 1897 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1898 1899 PetscFunctionBegin; 1900 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1901 aij->getrowactive = PETSC_FALSE; 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1906 { 1907 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1908 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1909 PetscErrorCode ierr; 1910 PetscInt i,j,cstart = mat->cmap->rstart; 1911 PetscReal sum = 0.0; 1912 MatScalar *v; 1913 1914 PetscFunctionBegin; 1915 if (aij->size == 1) { 1916 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1917 } else { 1918 if (type == NORM_FROBENIUS) { 1919 v = amat->a; 1920 for (i=0; i<amat->nz; i++) { 1921 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1922 } 1923 v = bmat->a; 1924 for (i=0; i<bmat->nz; i++) { 1925 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1926 } 1927 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1928 *norm = PetscSqrtReal(*norm); 1929 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1930 } else if (type == NORM_1) { /* max column norm */ 1931 PetscReal *tmp,*tmp2; 1932 PetscInt *jj,*garray = aij->garray; 1933 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1934 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1935 *norm = 0.0; 1936 v = amat->a; jj = amat->j; 1937 for (j=0; j<amat->nz; j++) { 1938 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1939 } 1940 v = bmat->a; jj = bmat->j; 1941 for (j=0; j<bmat->nz; j++) { 1942 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1943 } 1944 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1945 for (j=0; j<mat->cmap->N; j++) { 1946 if (tmp2[j] > *norm) *norm = tmp2[j]; 1947 } 1948 ierr = PetscFree(tmp);CHKERRQ(ierr); 1949 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1950 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1951 } else if (type == NORM_INFINITY) { /* max row norm */ 1952 PetscReal ntemp = 0.0; 1953 for (j=0; j<aij->A->rmap->n; j++) { 1954 v = amat->a + amat->i[j]; 1955 sum = 0.0; 1956 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1957 sum += PetscAbsScalar(*v); v++; 1958 } 1959 v = bmat->a + bmat->i[j]; 1960 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1961 sum += PetscAbsScalar(*v); v++; 1962 } 1963 if (sum > ntemp) ntemp = sum; 1964 } 1965 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1966 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1967 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1968 } 1969 PetscFunctionReturn(0); 1970 } 1971 1972 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1973 { 1974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1975 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1976 PetscErrorCode ierr; 1977 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1978 PetscInt cstart = A->cmap->rstart,ncol; 1979 Mat B; 1980 MatScalar *array; 1981 1982 PetscFunctionBegin; 1983 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1984 ai = Aloc->i; aj = Aloc->j; 1985 bi = Bloc->i; bj = Bloc->j; 1986 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1987 PetscInt *d_nnz,*g_nnz,*o_nnz; 1988 PetscSFNode *oloc; 1989 PETSC_UNUSED PetscSF sf; 1990 1991 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1992 /* compute d_nnz for preallocation */ 1993 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1994 for (i=0; i<ai[ma]; i++) { 1995 d_nnz[aj[i]]++; 1996 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1997 } 1998 /* compute local off-diagonal contributions */ 1999 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2000 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2001 /* map those to global */ 2002 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2003 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2004 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2005 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2006 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2007 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2008 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2009 2010 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2011 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2012 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2013 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2014 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2015 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2016 } else { 2017 B = *matout; 2018 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2019 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2020 } 2021 2022 /* copy over the A part */ 2023 array = Aloc->a; 2024 row = A->rmap->rstart; 2025 for (i=0; i<ma; i++) { 2026 ncol = ai[i+1]-ai[i]; 2027 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2028 row++; 2029 array += ncol; aj += ncol; 2030 } 2031 aj = Aloc->j; 2032 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2033 2034 /* copy over the B part */ 2035 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2036 array = Bloc->a; 2037 row = A->rmap->rstart; 2038 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2039 cols_tmp = cols; 2040 for (i=0; i<mb; i++) { 2041 ncol = bi[i+1]-bi[i]; 2042 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2043 row++; 2044 array += ncol; cols_tmp += ncol; 2045 } 2046 ierr = PetscFree(cols);CHKERRQ(ierr); 2047 2048 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2049 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2051 *matout = B; 2052 } else { 2053 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2059 { 2060 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2061 Mat a = aij->A,b = aij->B; 2062 PetscErrorCode ierr; 2063 PetscInt s1,s2,s3; 2064 2065 PetscFunctionBegin; 2066 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2067 if (rr) { 2068 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2069 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2070 /* Overlap communication with computation. */ 2071 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 if (ll) { 2074 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2075 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2076 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2077 } 2078 /* scale the diagonal block */ 2079 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2080 2081 if (rr) { 2082 /* Do a scatter end and then right scale the off-diagonal block */ 2083 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2090 { 2091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2100 { 2101 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2102 Mat a,b,c,d; 2103 PetscBool flg; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 a = matA->A; b = matA->B; 2108 c = matB->A; d = matB->B; 2109 2110 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2111 if (flg) { 2112 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2113 } 2114 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2123 2124 PetscFunctionBegin; 2125 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2126 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2127 /* because of the column compression in the off-processor part of the matrix a->B, 2128 the number of columns in a->B and b->B may be different, hence we cannot call 2129 the MatCopy() directly on the two parts. If need be, we can provide a more 2130 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2131 then copying the submatrices */ 2132 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2133 } else { 2134 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2135 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2136 } 2137 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2142 { 2143 PetscErrorCode ierr; 2144 2145 PetscFunctionBegin; 2146 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* 2151 Computes the number of nonzeros per row needed for preallocation when X and Y 2152 have different nonzero structure. 2153 */ 2154 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2155 { 2156 PetscInt i,j,k,nzx,nzy; 2157 2158 PetscFunctionBegin; 2159 /* Set the number of nonzeros in the new matrix */ 2160 for (i=0; i<m; i++) { 2161 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2162 nzx = xi[i+1] - xi[i]; 2163 nzy = yi[i+1] - yi[i]; 2164 nnz[i] = 0; 2165 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2166 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2167 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2168 nnz[i]++; 2169 } 2170 for (; k<nzy; k++) nnz[i]++; 2171 } 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2176 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2177 { 2178 PetscErrorCode ierr; 2179 PetscInt m = Y->rmap->N; 2180 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2181 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2182 2183 PetscFunctionBegin; 2184 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2189 { 2190 PetscErrorCode ierr; 2191 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2192 PetscBLASInt bnz,one=1; 2193 Mat_SeqAIJ *x,*y; 2194 2195 PetscFunctionBegin; 2196 if (str == SAME_NONZERO_PATTERN) { 2197 PetscScalar alpha = a; 2198 x = (Mat_SeqAIJ*)xx->A->data; 2199 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2200 y = (Mat_SeqAIJ*)yy->A->data; 2201 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2202 x = (Mat_SeqAIJ*)xx->B->data; 2203 y = (Mat_SeqAIJ*)yy->B->data; 2204 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2205 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2206 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2207 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2208 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2209 } else { 2210 Mat B; 2211 PetscInt *nnz_d,*nnz_o; 2212 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2213 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2214 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2215 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2216 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2217 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2218 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2219 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2220 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2221 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2222 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2223 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2224 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2225 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2231 2232 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2233 { 2234 #if defined(PETSC_USE_COMPLEX) 2235 PetscErrorCode ierr; 2236 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2237 2238 PetscFunctionBegin; 2239 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2240 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2241 #else 2242 PetscFunctionBegin; 2243 #endif 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2248 { 2249 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2250 PetscErrorCode ierr; 2251 2252 PetscFunctionBegin; 2253 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2254 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2265 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2270 { 2271 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2272 PetscErrorCode ierr; 2273 PetscInt i,*idxb = 0; 2274 PetscScalar *va,*vb; 2275 Vec vtmp; 2276 2277 PetscFunctionBegin; 2278 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2279 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2280 if (idx) { 2281 for (i=0; i<A->rmap->n; i++) { 2282 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2283 } 2284 } 2285 2286 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2287 if (idx) { 2288 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2289 } 2290 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2291 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2292 2293 for (i=0; i<A->rmap->n; i++) { 2294 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2295 va[i] = vb[i]; 2296 if (idx) idx[i] = a->garray[idxb[i]]; 2297 } 2298 } 2299 2300 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2301 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2302 ierr = PetscFree(idxb);CHKERRQ(ierr); 2303 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2308 { 2309 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2310 PetscErrorCode ierr; 2311 PetscInt i,*idxb = 0; 2312 PetscScalar *va,*vb; 2313 Vec vtmp; 2314 2315 PetscFunctionBegin; 2316 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2317 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2318 if (idx) { 2319 for (i=0; i<A->cmap->n; i++) { 2320 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2321 } 2322 } 2323 2324 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2325 if (idx) { 2326 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2327 } 2328 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2329 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2330 2331 for (i=0; i<A->rmap->n; i++) { 2332 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2333 va[i] = vb[i]; 2334 if (idx) idx[i] = a->garray[idxb[i]]; 2335 } 2336 } 2337 2338 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2339 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2340 ierr = PetscFree(idxb);CHKERRQ(ierr); 2341 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2348 PetscInt n = A->rmap->n; 2349 PetscInt cstart = A->cmap->rstart; 2350 PetscInt *cmap = mat->garray; 2351 PetscInt *diagIdx, *offdiagIdx; 2352 Vec diagV, offdiagV; 2353 PetscScalar *a, *diagA, *offdiagA; 2354 PetscInt r; 2355 PetscErrorCode ierr; 2356 2357 PetscFunctionBegin; 2358 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2359 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2360 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2361 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2362 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2363 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2364 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2365 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2366 for (r = 0; r < n; ++r) { 2367 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2368 a[r] = diagA[r]; 2369 idx[r] = cstart + diagIdx[r]; 2370 } else { 2371 a[r] = offdiagA[r]; 2372 idx[r] = cmap[offdiagIdx[r]]; 2373 } 2374 } 2375 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2378 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2379 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2380 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2385 { 2386 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2387 PetscInt n = A->rmap->n; 2388 PetscInt cstart = A->cmap->rstart; 2389 PetscInt *cmap = mat->garray; 2390 PetscInt *diagIdx, *offdiagIdx; 2391 Vec diagV, offdiagV; 2392 PetscScalar *a, *diagA, *offdiagA; 2393 PetscInt r; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2399 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2400 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2401 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2402 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2403 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2404 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2405 for (r = 0; r < n; ++r) { 2406 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2407 a[r] = diagA[r]; 2408 idx[r] = cstart + diagIdx[r]; 2409 } else { 2410 a[r] = offdiagA[r]; 2411 idx[r] = cmap[offdiagIdx[r]]; 2412 } 2413 } 2414 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2416 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2417 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2418 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2419 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2424 { 2425 PetscErrorCode ierr; 2426 Mat *dummy; 2427 2428 PetscFunctionBegin; 2429 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2430 *newmat = *dummy; 2431 ierr = PetscFree(dummy);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2442 A->factorerrortype = a->A->factorerrortype; 2443 PetscFunctionReturn(0); 2444 } 2445 2446 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2447 { 2448 PetscErrorCode ierr; 2449 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2450 2451 PetscFunctionBegin; 2452 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2453 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2454 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2455 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2460 { 2461 PetscFunctionBegin; 2462 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2463 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2464 PetscFunctionReturn(0); 2465 } 2466 2467 /*@ 2468 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2469 2470 Collective on Mat 2471 2472 Input Parameters: 2473 + A - the matrix 2474 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2475 2476 Level: advanced 2477 2478 @*/ 2479 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2480 { 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2489 { 2490 PetscErrorCode ierr; 2491 PetscBool sc = PETSC_FALSE,flg; 2492 2493 PetscFunctionBegin; 2494 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2495 ierr = PetscObjectOptionsBegin((PetscObject)A);CHKERRQ(ierr); 2496 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2497 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2498 if (flg) { 2499 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2500 } 2501 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2502 PetscFunctionReturn(0); 2503 } 2504 2505 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2506 { 2507 PetscErrorCode ierr; 2508 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2509 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2510 2511 PetscFunctionBegin; 2512 if (!Y->preallocated) { 2513 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2514 } else if (!aij->nz) { 2515 PetscInt nonew = aij->nonew; 2516 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2517 aij->nonew = nonew; 2518 } 2519 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2524 { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 PetscErrorCode ierr; 2527 2528 PetscFunctionBegin; 2529 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2530 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2531 if (d) { 2532 PetscInt rstart; 2533 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2534 *d += rstart; 2535 2536 } 2537 PetscFunctionReturn(0); 2538 } 2539 2540 2541 /* -------------------------------------------------------------------*/ 2542 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2543 MatGetRow_MPIAIJ, 2544 MatRestoreRow_MPIAIJ, 2545 MatMult_MPIAIJ, 2546 /* 4*/ MatMultAdd_MPIAIJ, 2547 MatMultTranspose_MPIAIJ, 2548 MatMultTransposeAdd_MPIAIJ, 2549 0, 2550 0, 2551 0, 2552 /*10*/ 0, 2553 0, 2554 0, 2555 MatSOR_MPIAIJ, 2556 MatTranspose_MPIAIJ, 2557 /*15*/ MatGetInfo_MPIAIJ, 2558 MatEqual_MPIAIJ, 2559 MatGetDiagonal_MPIAIJ, 2560 MatDiagonalScale_MPIAIJ, 2561 MatNorm_MPIAIJ, 2562 /*20*/ MatAssemblyBegin_MPIAIJ, 2563 MatAssemblyEnd_MPIAIJ, 2564 MatSetOption_MPIAIJ, 2565 MatZeroEntries_MPIAIJ, 2566 /*24*/ MatZeroRows_MPIAIJ, 2567 0, 2568 0, 2569 0, 2570 0, 2571 /*29*/ MatSetUp_MPIAIJ, 2572 0, 2573 0, 2574 MatGetDiagonalBlock_MPIAIJ, 2575 0, 2576 /*34*/ MatDuplicate_MPIAIJ, 2577 0, 2578 0, 2579 0, 2580 0, 2581 /*39*/ MatAXPY_MPIAIJ, 2582 MatCreateSubMatrices_MPIAIJ, 2583 MatIncreaseOverlap_MPIAIJ, 2584 MatGetValues_MPIAIJ, 2585 MatCopy_MPIAIJ, 2586 /*44*/ MatGetRowMax_MPIAIJ, 2587 MatScale_MPIAIJ, 2588 MatShift_MPIAIJ, 2589 MatDiagonalSet_MPIAIJ, 2590 MatZeroRowsColumns_MPIAIJ, 2591 /*49*/ MatSetRandom_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 0, 2596 /*54*/ MatFDColoringCreate_MPIXAIJ, 2597 0, 2598 MatSetUnfactored_MPIAIJ, 2599 MatPermute_MPIAIJ, 2600 0, 2601 /*59*/ MatCreateSubMatrix_MPIAIJ, 2602 MatDestroy_MPIAIJ, 2603 MatView_MPIAIJ, 2604 0, 2605 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2606 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2607 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2608 0, 2609 0, 2610 0, 2611 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2612 MatGetRowMinAbs_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*75*/ MatFDColoringApply_AIJ, 2618 MatSetFromOptions_MPIAIJ, 2619 0, 2620 0, 2621 MatFindZeroDiagonals_MPIAIJ, 2622 /*80*/ 0, 2623 0, 2624 0, 2625 /*83*/ MatLoad_MPIAIJ, 2626 MatIsSymmetric_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2632 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2633 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2634 MatPtAP_MPIAIJ_MPIAIJ, 2635 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2636 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*99*/ 0, 2642 0, 2643 0, 2644 MatConjugate_MPIAIJ, 2645 0, 2646 /*104*/MatSetValuesRow_MPIAIJ, 2647 MatRealPart_MPIAIJ, 2648 MatImaginaryPart_MPIAIJ, 2649 0, 2650 0, 2651 /*109*/0, 2652 0, 2653 MatGetRowMin_MPIAIJ, 2654 0, 2655 MatMissingDiagonal_MPIAIJ, 2656 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2657 0, 2658 MatGetGhosts_MPIAIJ, 2659 0, 2660 0, 2661 /*119*/0, 2662 0, 2663 0, 2664 0, 2665 MatGetMultiProcBlock_MPIAIJ, 2666 /*124*/MatFindNonzeroRows_MPIAIJ, 2667 MatGetColumnNorms_MPIAIJ, 2668 MatInvertBlockDiagonal_MPIAIJ, 2669 0, 2670 MatCreateSubMatricesMPI_MPIAIJ, 2671 /*129*/0, 2672 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2673 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2674 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2675 0, 2676 /*134*/0, 2677 0, 2678 MatRARt_MPIAIJ_MPIAIJ, 2679 0, 2680 0, 2681 /*139*/MatSetBlockSizes_MPIAIJ, 2682 0, 2683 0, 2684 MatFDColoringSetUp_MPIXAIJ, 2685 MatFindOffBlockDiagonalEntries_MPIAIJ, 2686 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2687 }; 2688 2689 /* ----------------------------------------------------------------------------------------*/ 2690 2691 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2692 { 2693 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2698 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 2702 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2703 { 2704 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2709 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2714 { 2715 Mat_MPIAIJ *b; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2720 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2721 b = (Mat_MPIAIJ*)B->data; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2725 #else 2726 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2727 #endif 2728 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2729 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2730 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2731 2732 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2733 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2734 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2735 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2736 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2737 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2738 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2739 2740 if (!B->preallocated) { 2741 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2742 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2743 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2744 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2745 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2746 } 2747 2748 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2749 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2750 B->preallocated = PETSC_TRUE; 2751 B->was_assembled = PETSC_FALSE; 2752 B->assembled = PETSC_FALSE;; 2753 PetscFunctionReturn(0); 2754 } 2755 2756 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2757 { 2758 Mat_MPIAIJ *b; 2759 PetscErrorCode ierr; 2760 2761 PetscFunctionBegin; 2762 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2763 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2764 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2765 b = (Mat_MPIAIJ*)B->data; 2766 2767 #if defined(PETSC_USE_CTABLE) 2768 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2769 #else 2770 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2771 #endif 2772 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2773 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2774 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2775 2776 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2777 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2778 B->preallocated = PETSC_TRUE; 2779 B->was_assembled = PETSC_FALSE; 2780 B->assembled = PETSC_FALSE; 2781 PetscFunctionReturn(0); 2782 } 2783 2784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2785 { 2786 Mat mat; 2787 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2788 PetscErrorCode ierr; 2789 2790 PetscFunctionBegin; 2791 *newmat = 0; 2792 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2793 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2794 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2795 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2796 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2797 a = (Mat_MPIAIJ*)mat->data; 2798 2799 mat->factortype = matin->factortype; 2800 mat->assembled = PETSC_TRUE; 2801 mat->insertmode = NOT_SET_VALUES; 2802 mat->preallocated = PETSC_TRUE; 2803 2804 a->size = oldmat->size; 2805 a->rank = oldmat->rank; 2806 a->donotstash = oldmat->donotstash; 2807 a->roworiented = oldmat->roworiented; 2808 a->rowindices = 0; 2809 a->rowvalues = 0; 2810 a->getrowactive = PETSC_FALSE; 2811 2812 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2813 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2814 2815 if (oldmat->colmap) { 2816 #if defined(PETSC_USE_CTABLE) 2817 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2818 #else 2819 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2820 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2821 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2822 #endif 2823 } else a->colmap = 0; 2824 if (oldmat->garray) { 2825 PetscInt len; 2826 len = oldmat->B->cmap->n; 2827 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2829 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2830 } else a->garray = 0; 2831 2832 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2833 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2834 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2835 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2836 2837 if (oldmat->Mvctx_mpi1) { 2838 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2840 } 2841 2842 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2844 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2846 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2847 *newmat = mat; 2848 PetscFunctionReturn(0); 2849 } 2850 2851 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2852 { 2853 PetscScalar *vals,*svals; 2854 MPI_Comm comm; 2855 PetscErrorCode ierr; 2856 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2857 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2858 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2859 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2860 PetscInt cend,cstart,n,*rowners; 2861 int fd; 2862 PetscInt bs = newMat->rmap->bs; 2863 2864 PetscFunctionBegin; 2865 /* force binary viewer to load .info file if it has not yet done so */ 2866 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2867 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2868 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2869 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2870 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2871 if (!rank) { 2872 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2873 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2874 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2875 } 2876 2877 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2878 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2879 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2880 if (bs < 0) bs = 1; 2881 2882 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2883 M = header[1]; N = header[2]; 2884 2885 /* If global sizes are set, check if they are consistent with that given in the file */ 2886 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2887 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2888 2889 /* determine ownership of all (block) rows */ 2890 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2891 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2892 else m = newMat->rmap->n; /* Set by user */ 2893 2894 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2895 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2896 2897 /* First process needs enough room for process with most rows */ 2898 if (!rank) { 2899 mmax = rowners[1]; 2900 for (i=2; i<=size; i++) { 2901 mmax = PetscMax(mmax, rowners[i]); 2902 } 2903 } else mmax = -1; /* unused, but compilers complain */ 2904 2905 rowners[0] = 0; 2906 for (i=2; i<=size; i++) { 2907 rowners[i] += rowners[i-1]; 2908 } 2909 rstart = rowners[rank]; 2910 rend = rowners[rank+1]; 2911 2912 /* distribute row lengths to all processors */ 2913 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2914 if (!rank) { 2915 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2916 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2917 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2918 for (j=0; j<m; j++) { 2919 procsnz[0] += ourlens[j]; 2920 } 2921 for (i=1; i<size; i++) { 2922 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2923 /* calculate the number of nonzeros on each processor */ 2924 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2925 procsnz[i] += rowlengths[j]; 2926 } 2927 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2928 } 2929 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2930 } else { 2931 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2932 } 2933 2934 if (!rank) { 2935 /* determine max buffer needed and allocate it */ 2936 maxnz = 0; 2937 for (i=0; i<size; i++) { 2938 maxnz = PetscMax(maxnz,procsnz[i]); 2939 } 2940 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2941 2942 /* read in my part of the matrix column indices */ 2943 nz = procsnz[0]; 2944 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2945 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2946 2947 /* read in every one elses and ship off */ 2948 for (i=1; i<size; i++) { 2949 nz = procsnz[i]; 2950 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2951 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2952 } 2953 ierr = PetscFree(cols);CHKERRQ(ierr); 2954 } else { 2955 /* determine buffer space needed for message */ 2956 nz = 0; 2957 for (i=0; i<m; i++) { 2958 nz += ourlens[i]; 2959 } 2960 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2961 2962 /* receive message of column indices*/ 2963 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2964 } 2965 2966 /* determine column ownership if matrix is not square */ 2967 if (N != M) { 2968 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2969 else n = newMat->cmap->n; 2970 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2971 cstart = cend - n; 2972 } else { 2973 cstart = rstart; 2974 cend = rend; 2975 n = cend - cstart; 2976 } 2977 2978 /* loop over local rows, determining number of off diagonal entries */ 2979 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2980 jj = 0; 2981 for (i=0; i<m; i++) { 2982 for (j=0; j<ourlens[i]; j++) { 2983 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2984 jj++; 2985 } 2986 } 2987 2988 for (i=0; i<m; i++) { 2989 ourlens[i] -= offlens[i]; 2990 } 2991 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2992 2993 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2994 2995 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2996 2997 for (i=0; i<m; i++) { 2998 ourlens[i] += offlens[i]; 2999 } 3000 3001 if (!rank) { 3002 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3003 3004 /* read in my part of the matrix numerical values */ 3005 nz = procsnz[0]; 3006 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3007 3008 /* insert into matrix */ 3009 jj = rstart; 3010 smycols = mycols; 3011 svals = vals; 3012 for (i=0; i<m; i++) { 3013 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3014 smycols += ourlens[i]; 3015 svals += ourlens[i]; 3016 jj++; 3017 } 3018 3019 /* read in other processors and ship out */ 3020 for (i=1; i<size; i++) { 3021 nz = procsnz[i]; 3022 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3023 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3024 } 3025 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3026 } else { 3027 /* receive numeric values */ 3028 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3029 3030 /* receive message of values*/ 3031 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3032 3033 /* insert into matrix */ 3034 jj = rstart; 3035 smycols = mycols; 3036 svals = vals; 3037 for (i=0; i<m; i++) { 3038 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3039 smycols += ourlens[i]; 3040 svals += ourlens[i]; 3041 jj++; 3042 } 3043 } 3044 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3045 ierr = PetscFree(vals);CHKERRQ(ierr); 3046 ierr = PetscFree(mycols);CHKERRQ(ierr); 3047 ierr = PetscFree(rowners);CHKERRQ(ierr); 3048 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3049 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 /* Not scalable because of ISAllGather() unless getting all columns. */ 3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3055 { 3056 PetscErrorCode ierr; 3057 IS iscol_local; 3058 PetscBool isstride; 3059 PetscMPIInt lisstride=0,gisstride; 3060 3061 PetscFunctionBegin; 3062 /* check if we are grabbing all columns*/ 3063 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3064 3065 if (isstride) { 3066 PetscInt start,len,mstart,mlen; 3067 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3068 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3069 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3070 if (mstart == start && mlen-mstart == len) lisstride = 1; 3071 } 3072 3073 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3074 if (gisstride) { 3075 PetscInt N; 3076 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3077 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3078 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3079 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3080 } else { 3081 PetscInt cbs; 3082 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3083 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3084 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3085 } 3086 3087 *isseq = iscol_local; 3088 PetscFunctionReturn(0); 3089 } 3090 3091 /* 3092 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3093 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3094 3095 Input Parameters: 3096 mat - matrix 3097 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3098 i.e., mat->rstart <= isrow[i] < mat->rend 3099 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3100 i.e., mat->cstart <= iscol[i] < mat->cend 3101 Output Parameter: 3102 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3103 iscol_o - sequential column index set for retrieving mat->B 3104 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3105 */ 3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3107 { 3108 PetscErrorCode ierr; 3109 Vec x,cmap; 3110 const PetscInt *is_idx; 3111 PetscScalar *xarray,*cmaparray; 3112 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3113 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3114 Mat B=a->B; 3115 Vec lvec=a->lvec,lcmap; 3116 PetscInt i,cstart,cend,Bn=B->cmap->N; 3117 MPI_Comm comm; 3118 VecScatter Mvctx=a->Mvctx; 3119 3120 PetscFunctionBegin; 3121 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3122 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3123 3124 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3125 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3126 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3127 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3128 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3129 3130 /* Get start indices */ 3131 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3132 isstart -= ncols; 3133 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3134 3135 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3136 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3137 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3138 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3139 for (i=0; i<ncols; i++) { 3140 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3141 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3142 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3143 } 3144 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3145 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3146 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3147 3148 /* Get iscol_d */ 3149 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3150 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3151 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3152 3153 /* Get isrow_d */ 3154 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3155 rstart = mat->rmap->rstart; 3156 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3157 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3158 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3159 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3160 3161 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3162 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3163 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3164 3165 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3166 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3167 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3168 3169 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3170 3171 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3172 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 3174 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3175 /* off-process column indices */ 3176 count = 0; 3177 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3178 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3179 3180 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3181 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3182 for (i=0; i<Bn; i++) { 3183 if (PetscRealPart(xarray[i]) > -1.0) { 3184 idx[count] = i; /* local column index in off-diagonal part B */ 3185 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3186 count++; 3187 } 3188 } 3189 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3190 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3191 3192 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3193 /* cannot ensure iscol_o has same blocksize as iscol! */ 3194 3195 ierr = PetscFree(idx);CHKERRQ(ierr); 3196 *garray = cmap1; 3197 3198 ierr = VecDestroy(&x);CHKERRQ(ierr); 3199 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3200 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3206 { 3207 PetscErrorCode ierr; 3208 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3209 Mat M = NULL; 3210 MPI_Comm comm; 3211 IS iscol_d,isrow_d,iscol_o; 3212 Mat Asub = NULL,Bsub = NULL; 3213 PetscInt n; 3214 3215 PetscFunctionBegin; 3216 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3217 3218 if (call == MAT_REUSE_MATRIX) { 3219 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3220 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3221 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3222 3223 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3224 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3225 3226 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3227 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3228 3229 /* Update diagonal and off-diagonal portions of submat */ 3230 asub = (Mat_MPIAIJ*)(*submat)->data; 3231 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3232 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3233 if (n) { 3234 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3235 } 3236 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3237 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 3239 } else { /* call == MAT_INITIAL_MATRIX) */ 3240 const PetscInt *garray; 3241 PetscInt BsubN; 3242 3243 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3244 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3245 3246 /* Create local submatrices Asub and Bsub */ 3247 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3248 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3249 3250 /* Create submatrix M */ 3251 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3252 3253 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3254 asub = (Mat_MPIAIJ*)M->data; 3255 3256 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3257 n = asub->B->cmap->N; 3258 if (BsubN > n) { 3259 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3260 const PetscInt *idx; 3261 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3262 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3263 3264 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3265 j = 0; 3266 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3267 for (i=0; i<n; i++) { 3268 if (j >= BsubN) break; 3269 while (subgarray[i] > garray[j]) j++; 3270 3271 if (subgarray[i] == garray[j]) { 3272 idx_new[i] = idx[j++]; 3273 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3274 } 3275 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3276 3277 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3278 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3279 3280 } else if (BsubN < n) { 3281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3282 } 3283 3284 ierr = PetscFree(garray);CHKERRQ(ierr); 3285 *submat = M; 3286 3287 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3288 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3289 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3290 3291 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3292 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3293 3294 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3295 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3296 } 3297 PetscFunctionReturn(0); 3298 } 3299 3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3301 { 3302 PetscErrorCode ierr; 3303 IS iscol_local=NULL,isrow_d; 3304 PetscInt csize; 3305 PetscInt n,i,j,start,end; 3306 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3307 MPI_Comm comm; 3308 3309 PetscFunctionBegin; 3310 /* If isrow has same processor distribution as mat, 3311 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3312 if (call == MAT_REUSE_MATRIX) { 3313 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3314 if (isrow_d) { 3315 sameRowDist = PETSC_TRUE; 3316 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3317 } else { 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3319 if (iscol_local) { 3320 sameRowDist = PETSC_TRUE; 3321 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3322 } 3323 } 3324 } else { 3325 /* Check if isrow has same processor distribution as mat */ 3326 sameDist[0] = PETSC_FALSE; 3327 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3328 if (!n) { 3329 sameDist[0] = PETSC_TRUE; 3330 } else { 3331 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3332 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3333 if (i >= start && j < end) { 3334 sameDist[0] = PETSC_TRUE; 3335 } 3336 } 3337 3338 /* Check if iscol has same processor distribution as mat */ 3339 sameDist[1] = PETSC_FALSE; 3340 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3341 if (!n) { 3342 sameDist[1] = PETSC_TRUE; 3343 } else { 3344 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3345 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3346 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3347 } 3348 3349 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3350 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3351 sameRowDist = tsameDist[0]; 3352 } 3353 3354 if (sameRowDist) { 3355 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3356 /* isrow and iscol have same processor distribution as mat */ 3357 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3358 PetscFunctionReturn(0); 3359 } else { /* sameRowDist */ 3360 /* isrow has same processor distribution as mat */ 3361 if (call == MAT_INITIAL_MATRIX) { 3362 PetscBool sorted; 3363 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3364 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3365 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3366 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3367 3368 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3369 if (sorted) { 3370 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3371 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 } else { /* call == MAT_REUSE_MATRIX */ 3375 IS iscol_sub; 3376 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3377 if (iscol_sub) { 3378 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3379 PetscFunctionReturn(0); 3380 } 3381 } 3382 } 3383 } 3384 3385 /* General case: iscol -> iscol_local which has global size of iscol */ 3386 if (call == MAT_REUSE_MATRIX) { 3387 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3388 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3389 } else { 3390 if (!iscol_local) { 3391 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3392 } 3393 } 3394 3395 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3396 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3397 3398 if (call == MAT_INITIAL_MATRIX) { 3399 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3400 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3401 } 3402 PetscFunctionReturn(0); 3403 } 3404 3405 /*@C 3406 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3407 and "off-diagonal" part of the matrix in CSR format. 3408 3409 Collective on MPI_Comm 3410 3411 Input Parameters: 3412 + comm - MPI communicator 3413 . A - "diagonal" portion of matrix 3414 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3415 - garray - global index of B columns 3416 3417 Output Parameter: 3418 . mat - the matrix, with input A as its local diagonal matrix 3419 Level: advanced 3420 3421 Notes: 3422 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3423 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3424 3425 .seealso: MatCreateMPIAIJWithSplitArrays() 3426 @*/ 3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3428 { 3429 PetscErrorCode ierr; 3430 Mat_MPIAIJ *maij; 3431 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3432 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3433 PetscScalar *oa=b->a; 3434 Mat Bnew; 3435 PetscInt m,n,N; 3436 3437 PetscFunctionBegin; 3438 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3439 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3440 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3441 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3442 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3443 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3444 3445 /* Get global columns of mat */ 3446 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3447 3448 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3449 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3450 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3451 maij = (Mat_MPIAIJ*)(*mat)->data; 3452 3453 (*mat)->preallocated = PETSC_TRUE; 3454 3455 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3456 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3457 3458 /* Set A as diagonal portion of *mat */ 3459 maij->A = A; 3460 3461 nz = oi[m]; 3462 for (i=0; i<nz; i++) { 3463 col = oj[i]; 3464 oj[i] = garray[col]; 3465 } 3466 3467 /* Set Bnew as off-diagonal portion of *mat */ 3468 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3469 bnew = (Mat_SeqAIJ*)Bnew->data; 3470 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3471 maij->B = Bnew; 3472 3473 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3474 3475 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3476 b->free_a = PETSC_FALSE; 3477 b->free_ij = PETSC_FALSE; 3478 ierr = MatDestroy(&B);CHKERRQ(ierr); 3479 3480 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3481 bnew->free_a = PETSC_TRUE; 3482 bnew->free_ij = PETSC_TRUE; 3483 3484 /* condense columns of maij->B */ 3485 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3486 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3487 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3488 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3489 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3494 3495 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3496 { 3497 PetscErrorCode ierr; 3498 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3499 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3500 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3501 Mat M,Msub,B=a->B; 3502 MatScalar *aa; 3503 Mat_SeqAIJ *aij; 3504 PetscInt *garray = a->garray,*colsub,Ncols; 3505 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3506 IS iscol_sub,iscmap; 3507 const PetscInt *is_idx,*cmap; 3508 PetscBool allcolumns=PETSC_FALSE; 3509 MPI_Comm comm; 3510 3511 PetscFunctionBegin; 3512 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3513 3514 if (call == MAT_REUSE_MATRIX) { 3515 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3516 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3517 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3518 3519 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3520 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3521 3522 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3523 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3524 3525 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3526 3527 } else { /* call == MAT_INITIAL_MATRIX) */ 3528 PetscBool flg; 3529 3530 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3531 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3532 3533 /* (1) iscol -> nonscalable iscol_local */ 3534 /* Check for special case: each processor gets entire matrix columns */ 3535 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3536 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3537 if (allcolumns) { 3538 iscol_sub = iscol_local; 3539 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3540 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3541 3542 } else { 3543 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3544 PetscInt *idx,*cmap1,k; 3545 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3546 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3547 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3548 count = 0; 3549 k = 0; 3550 for (i=0; i<Ncols; i++) { 3551 j = is_idx[i]; 3552 if (j >= cstart && j < cend) { 3553 /* diagonal part of mat */ 3554 idx[count] = j; 3555 cmap1[count++] = i; /* column index in submat */ 3556 } else if (Bn) { 3557 /* off-diagonal part of mat */ 3558 if (j == garray[k]) { 3559 idx[count] = j; 3560 cmap1[count++] = i; /* column index in submat */ 3561 } else if (j > garray[k]) { 3562 while (j > garray[k] && k < Bn-1) k++; 3563 if (j == garray[k]) { 3564 idx[count] = j; 3565 cmap1[count++] = i; /* column index in submat */ 3566 } 3567 } 3568 } 3569 } 3570 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3571 3572 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3573 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3574 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3575 3576 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3577 } 3578 3579 /* (3) Create sequential Msub */ 3580 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3581 } 3582 3583 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3584 aij = (Mat_SeqAIJ*)(Msub)->data; 3585 ii = aij->i; 3586 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3587 3588 /* 3589 m - number of local rows 3590 Ncols - number of columns (same on all processors) 3591 rstart - first row in new global matrix generated 3592 */ 3593 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3594 3595 if (call == MAT_INITIAL_MATRIX) { 3596 /* (4) Create parallel newmat */ 3597 PetscMPIInt rank,size; 3598 PetscInt csize; 3599 3600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3602 3603 /* 3604 Determine the number of non-zeros in the diagonal and off-diagonal 3605 portions of the matrix in order to do correct preallocation 3606 */ 3607 3608 /* first get start and end of "diagonal" columns */ 3609 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3610 if (csize == PETSC_DECIDE) { 3611 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3612 if (mglobal == Ncols) { /* square matrix */ 3613 nlocal = m; 3614 } else { 3615 nlocal = Ncols/size + ((Ncols % size) > rank); 3616 } 3617 } else { 3618 nlocal = csize; 3619 } 3620 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3621 rstart = rend - nlocal; 3622 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3623 3624 /* next, compute all the lengths */ 3625 jj = aij->j; 3626 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3627 olens = dlens + m; 3628 for (i=0; i<m; i++) { 3629 jend = ii[i+1] - ii[i]; 3630 olen = 0; 3631 dlen = 0; 3632 for (j=0; j<jend; j++) { 3633 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3634 else dlen++; 3635 jj++; 3636 } 3637 olens[i] = olen; 3638 dlens[i] = dlen; 3639 } 3640 3641 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3642 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3643 3644 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3645 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3646 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3647 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3648 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3649 ierr = PetscFree(dlens);CHKERRQ(ierr); 3650 3651 } else { /* call == MAT_REUSE_MATRIX */ 3652 M = *newmat; 3653 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3654 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3655 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3656 /* 3657 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3658 rather than the slower MatSetValues(). 3659 */ 3660 M->was_assembled = PETSC_TRUE; 3661 M->assembled = PETSC_FALSE; 3662 } 3663 3664 /* (5) Set values of Msub to *newmat */ 3665 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3666 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3667 3668 jj = aij->j; 3669 aa = aij->a; 3670 for (i=0; i<m; i++) { 3671 row = rstart + i; 3672 nz = ii[i+1] - ii[i]; 3673 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3674 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3675 jj += nz; aa += nz; 3676 } 3677 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3678 3679 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3680 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3681 3682 ierr = PetscFree(colsub);CHKERRQ(ierr); 3683 3684 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3685 if (call == MAT_INITIAL_MATRIX) { 3686 *newmat = M; 3687 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3688 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3689 3690 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3691 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3692 3693 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3694 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3695 3696 if (iscol_local) { 3697 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3698 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3699 } 3700 } 3701 PetscFunctionReturn(0); 3702 } 3703 3704 /* 3705 Not great since it makes two copies of the submatrix, first an SeqAIJ 3706 in local and then by concatenating the local matrices the end result. 3707 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3708 3709 Note: This requires a sequential iscol with all indices. 3710 */ 3711 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3712 { 3713 PetscErrorCode ierr; 3714 PetscMPIInt rank,size; 3715 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3716 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3717 Mat M,Mreuse; 3718 MatScalar *aa,*vwork; 3719 MPI_Comm comm; 3720 Mat_SeqAIJ *aij; 3721 PetscBool colflag,allcolumns=PETSC_FALSE; 3722 3723 PetscFunctionBegin; 3724 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3727 3728 /* Check for special case: each processor gets entire matrix columns */ 3729 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3730 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3731 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3732 3733 if (call == MAT_REUSE_MATRIX) { 3734 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3735 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3736 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3737 } else { 3738 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3739 } 3740 3741 /* 3742 m - number of local rows 3743 n - number of columns (same on all processors) 3744 rstart - first row in new global matrix generated 3745 */ 3746 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3747 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3748 if (call == MAT_INITIAL_MATRIX) { 3749 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3750 ii = aij->i; 3751 jj = aij->j; 3752 3753 /* 3754 Determine the number of non-zeros in the diagonal and off-diagonal 3755 portions of the matrix in order to do correct preallocation 3756 */ 3757 3758 /* first get start and end of "diagonal" columns */ 3759 if (csize == PETSC_DECIDE) { 3760 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3761 if (mglobal == n) { /* square matrix */ 3762 nlocal = m; 3763 } else { 3764 nlocal = n/size + ((n % size) > rank); 3765 } 3766 } else { 3767 nlocal = csize; 3768 } 3769 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3770 rstart = rend - nlocal; 3771 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3772 3773 /* next, compute all the lengths */ 3774 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3775 olens = dlens + m; 3776 for (i=0; i<m; i++) { 3777 jend = ii[i+1] - ii[i]; 3778 olen = 0; 3779 dlen = 0; 3780 for (j=0; j<jend; j++) { 3781 if (*jj < rstart || *jj >= rend) olen++; 3782 else dlen++; 3783 jj++; 3784 } 3785 olens[i] = olen; 3786 dlens[i] = dlen; 3787 } 3788 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3789 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3790 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3791 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3792 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3793 ierr = PetscFree(dlens);CHKERRQ(ierr); 3794 } else { 3795 PetscInt ml,nl; 3796 3797 M = *newmat; 3798 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3799 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3800 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3801 /* 3802 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3803 rather than the slower MatSetValues(). 3804 */ 3805 M->was_assembled = PETSC_TRUE; 3806 M->assembled = PETSC_FALSE; 3807 } 3808 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3809 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3810 ii = aij->i; 3811 jj = aij->j; 3812 aa = aij->a; 3813 for (i=0; i<m; i++) { 3814 row = rstart + i; 3815 nz = ii[i+1] - ii[i]; 3816 cwork = jj; jj += nz; 3817 vwork = aa; aa += nz; 3818 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3819 } 3820 3821 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3822 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 *newmat = M; 3824 3825 /* save submatrix used in processor for next request */ 3826 if (call == MAT_INITIAL_MATRIX) { 3827 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3828 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3829 } 3830 PetscFunctionReturn(0); 3831 } 3832 3833 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3834 { 3835 PetscInt m,cstart, cend,j,nnz,i,d; 3836 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3837 const PetscInt *JJ; 3838 PetscScalar *values; 3839 PetscErrorCode ierr; 3840 PetscBool nooffprocentries; 3841 3842 PetscFunctionBegin; 3843 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3844 3845 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3846 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3847 m = B->rmap->n; 3848 cstart = B->cmap->rstart; 3849 cend = B->cmap->rend; 3850 rstart = B->rmap->rstart; 3851 3852 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3853 3854 #if defined(PETSC_USE_DEBUG) 3855 for (i=0; i<m; i++) { 3856 nnz = Ii[i+1]- Ii[i]; 3857 JJ = J + Ii[i]; 3858 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3859 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3860 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3861 } 3862 #endif 3863 3864 for (i=0; i<m; i++) { 3865 nnz = Ii[i+1]- Ii[i]; 3866 JJ = J + Ii[i]; 3867 nnz_max = PetscMax(nnz_max,nnz); 3868 d = 0; 3869 for (j=0; j<nnz; j++) { 3870 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3871 } 3872 d_nnz[i] = d; 3873 o_nnz[i] = nnz - d; 3874 } 3875 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3876 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3877 3878 if (v) values = (PetscScalar*)v; 3879 else { 3880 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3881 } 3882 3883 for (i=0; i<m; i++) { 3884 ii = i + rstart; 3885 nnz = Ii[i+1]- Ii[i]; 3886 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3887 } 3888 nooffprocentries = B->nooffprocentries; 3889 B->nooffprocentries = PETSC_TRUE; 3890 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3891 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 B->nooffprocentries = nooffprocentries; 3893 3894 if (!v) { 3895 ierr = PetscFree(values);CHKERRQ(ierr); 3896 } 3897 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3898 PetscFunctionReturn(0); 3899 } 3900 3901 /*@ 3902 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3903 (the default parallel PETSc format). 3904 3905 Collective on MPI_Comm 3906 3907 Input Parameters: 3908 + B - the matrix 3909 . i - the indices into j for the start of each local row (starts with zero) 3910 . j - the column indices for each local row (starts with zero) 3911 - v - optional values in the matrix 3912 3913 Level: developer 3914 3915 Notes: 3916 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3917 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3918 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3919 3920 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3921 3922 The format which is used for the sparse matrix input, is equivalent to a 3923 row-major ordering.. i.e for the following matrix, the input data expected is 3924 as shown 3925 3926 $ 1 0 0 3927 $ 2 0 3 P0 3928 $ ------- 3929 $ 4 5 6 P1 3930 $ 3931 $ Process0 [P0]: rows_owned=[0,1] 3932 $ i = {0,1,3} [size = nrow+1 = 2+1] 3933 $ j = {0,0,2} [size = 3] 3934 $ v = {1,2,3} [size = 3] 3935 $ 3936 $ Process1 [P1]: rows_owned=[2] 3937 $ i = {0,3} [size = nrow+1 = 1+1] 3938 $ j = {0,1,2} [size = 3] 3939 $ v = {4,5,6} [size = 3] 3940 3941 .keywords: matrix, aij, compressed row, sparse, parallel 3942 3943 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3944 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3945 @*/ 3946 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3947 { 3948 PetscErrorCode ierr; 3949 3950 PetscFunctionBegin; 3951 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3952 PetscFunctionReturn(0); 3953 } 3954 3955 /*@C 3956 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3957 (the default parallel PETSc format). For good matrix assembly performance 3958 the user should preallocate the matrix storage by setting the parameters 3959 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3960 performance can be increased by more than a factor of 50. 3961 3962 Collective on MPI_Comm 3963 3964 Input Parameters: 3965 + B - the matrix 3966 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3967 (same value is used for all local rows) 3968 . d_nnz - array containing the number of nonzeros in the various rows of the 3969 DIAGONAL portion of the local submatrix (possibly different for each row) 3970 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3971 The size of this array is equal to the number of local rows, i.e 'm'. 3972 For matrices that will be factored, you must leave room for (and set) 3973 the diagonal entry even if it is zero. 3974 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3975 submatrix (same value is used for all local rows). 3976 - o_nnz - array containing the number of nonzeros in the various rows of the 3977 OFF-DIAGONAL portion of the local submatrix (possibly different for 3978 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3979 structure. The size of this array is equal to the number 3980 of local rows, i.e 'm'. 3981 3982 If the *_nnz parameter is given then the *_nz parameter is ignored 3983 3984 The AIJ format (also called the Yale sparse matrix format or 3985 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3986 storage. The stored row and column indices begin with zero. 3987 See Users-Manual: ch_mat for details. 3988 3989 The parallel matrix is partitioned such that the first m0 rows belong to 3990 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3991 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3992 3993 The DIAGONAL portion of the local submatrix of a processor can be defined 3994 as the submatrix which is obtained by extraction the part corresponding to 3995 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3996 first row that belongs to the processor, r2 is the last row belonging to 3997 the this processor, and c1-c2 is range of indices of the local part of a 3998 vector suitable for applying the matrix to. This is an mxn matrix. In the 3999 common case of a square matrix, the row and column ranges are the same and 4000 the DIAGONAL part is also square. The remaining portion of the local 4001 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4002 4003 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4004 4005 You can call MatGetInfo() to get information on how effective the preallocation was; 4006 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4007 You can also run with the option -info and look for messages with the string 4008 malloc in them to see if additional memory allocation was needed. 4009 4010 Example usage: 4011 4012 Consider the following 8x8 matrix with 34 non-zero values, that is 4013 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4014 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4015 as follows: 4016 4017 .vb 4018 1 2 0 | 0 3 0 | 0 4 4019 Proc0 0 5 6 | 7 0 0 | 8 0 4020 9 0 10 | 11 0 0 | 12 0 4021 ------------------------------------- 4022 13 0 14 | 15 16 17 | 0 0 4023 Proc1 0 18 0 | 19 20 21 | 0 0 4024 0 0 0 | 22 23 0 | 24 0 4025 ------------------------------------- 4026 Proc2 25 26 27 | 0 0 28 | 29 0 4027 30 0 0 | 31 32 33 | 0 34 4028 .ve 4029 4030 This can be represented as a collection of submatrices as: 4031 4032 .vb 4033 A B C 4034 D E F 4035 G H I 4036 .ve 4037 4038 Where the submatrices A,B,C are owned by proc0, D,E,F are 4039 owned by proc1, G,H,I are owned by proc2. 4040 4041 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4042 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4043 The 'M','N' parameters are 8,8, and have the same values on all procs. 4044 4045 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4046 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4047 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4048 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4049 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4050 matrix, ans [DF] as another SeqAIJ matrix. 4051 4052 When d_nz, o_nz parameters are specified, d_nz storage elements are 4053 allocated for every row of the local diagonal submatrix, and o_nz 4054 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4055 One way to choose d_nz and o_nz is to use the max nonzerors per local 4056 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4057 In this case, the values of d_nz,o_nz are: 4058 .vb 4059 proc0 : dnz = 2, o_nz = 2 4060 proc1 : dnz = 3, o_nz = 2 4061 proc2 : dnz = 1, o_nz = 4 4062 .ve 4063 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4064 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4065 for proc3. i.e we are using 12+15+10=37 storage locations to store 4066 34 values. 4067 4068 When d_nnz, o_nnz parameters are specified, the storage is specified 4069 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4070 In the above case the values for d_nnz,o_nnz are: 4071 .vb 4072 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4073 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4074 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4075 .ve 4076 Here the space allocated is sum of all the above values i.e 34, and 4077 hence pre-allocation is perfect. 4078 4079 Level: intermediate 4080 4081 .keywords: matrix, aij, compressed row, sparse, parallel 4082 4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4084 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4085 @*/ 4086 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4087 { 4088 PetscErrorCode ierr; 4089 4090 PetscFunctionBegin; 4091 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4092 PetscValidType(B,1); 4093 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4094 PetscFunctionReturn(0); 4095 } 4096 4097 /*@ 4098 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4099 CSR format the local rows. 4100 4101 Collective on MPI_Comm 4102 4103 Input Parameters: 4104 + comm - MPI communicator 4105 . m - number of local rows (Cannot be PETSC_DECIDE) 4106 . n - This value should be the same as the local size used in creating the 4107 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4108 calculated if N is given) For square matrices n is almost always m. 4109 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4110 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4111 . i - row indices 4112 . j - column indices 4113 - a - matrix values 4114 4115 Output Parameter: 4116 . mat - the matrix 4117 4118 Level: intermediate 4119 4120 Notes: 4121 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4122 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4123 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4124 4125 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4126 4127 The format which is used for the sparse matrix input, is equivalent to a 4128 row-major ordering.. i.e for the following matrix, the input data expected is 4129 as shown 4130 4131 $ 1 0 0 4132 $ 2 0 3 P0 4133 $ ------- 4134 $ 4 5 6 P1 4135 $ 4136 $ Process0 [P0]: rows_owned=[0,1] 4137 $ i = {0,1,3} [size = nrow+1 = 2+1] 4138 $ j = {0,0,2} [size = 3] 4139 $ v = {1,2,3} [size = 3] 4140 $ 4141 $ Process1 [P1]: rows_owned=[2] 4142 $ i = {0,3} [size = nrow+1 = 1+1] 4143 $ j = {0,1,2} [size = 3] 4144 $ v = {4,5,6} [size = 3] 4145 4146 .keywords: matrix, aij, compressed row, sparse, parallel 4147 4148 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4149 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4150 @*/ 4151 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4152 { 4153 PetscErrorCode ierr; 4154 4155 PetscFunctionBegin; 4156 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4157 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4158 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4159 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4160 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4161 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4162 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4163 PetscFunctionReturn(0); 4164 } 4165 4166 /*@C 4167 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4168 (the default parallel PETSc format). For good matrix assembly performance 4169 the user should preallocate the matrix storage by setting the parameters 4170 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4171 performance can be increased by more than a factor of 50. 4172 4173 Collective on MPI_Comm 4174 4175 Input Parameters: 4176 + comm - MPI communicator 4177 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4178 This value should be the same as the local size used in creating the 4179 y vector for the matrix-vector product y = Ax. 4180 . n - This value should be the same as the local size used in creating the 4181 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4182 calculated if N is given) For square matrices n is almost always m. 4183 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4184 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4185 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4186 (same value is used for all local rows) 4187 . d_nnz - array containing the number of nonzeros in the various rows of the 4188 DIAGONAL portion of the local submatrix (possibly different for each row) 4189 or NULL, if d_nz is used to specify the nonzero structure. 4190 The size of this array is equal to the number of local rows, i.e 'm'. 4191 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4192 submatrix (same value is used for all local rows). 4193 - o_nnz - array containing the number of nonzeros in the various rows of the 4194 OFF-DIAGONAL portion of the local submatrix (possibly different for 4195 each row) or NULL, if o_nz is used to specify the nonzero 4196 structure. The size of this array is equal to the number 4197 of local rows, i.e 'm'. 4198 4199 Output Parameter: 4200 . A - the matrix 4201 4202 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4203 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4204 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4205 4206 Notes: 4207 If the *_nnz parameter is given then the *_nz parameter is ignored 4208 4209 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4210 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4211 storage requirements for this matrix. 4212 4213 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4214 processor than it must be used on all processors that share the object for 4215 that argument. 4216 4217 The user MUST specify either the local or global matrix dimensions 4218 (possibly both). 4219 4220 The parallel matrix is partitioned across processors such that the 4221 first m0 rows belong to process 0, the next m1 rows belong to 4222 process 1, the next m2 rows belong to process 2 etc.. where 4223 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4224 values corresponding to [m x N] submatrix. 4225 4226 The columns are logically partitioned with the n0 columns belonging 4227 to 0th partition, the next n1 columns belonging to the next 4228 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4229 4230 The DIAGONAL portion of the local submatrix on any given processor 4231 is the submatrix corresponding to the rows and columns m,n 4232 corresponding to the given processor. i.e diagonal matrix on 4233 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4234 etc. The remaining portion of the local submatrix [m x (N-n)] 4235 constitute the OFF-DIAGONAL portion. The example below better 4236 illustrates this concept. 4237 4238 For a square global matrix we define each processor's diagonal portion 4239 to be its local rows and the corresponding columns (a square submatrix); 4240 each processor's off-diagonal portion encompasses the remainder of the 4241 local matrix (a rectangular submatrix). 4242 4243 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4244 4245 When calling this routine with a single process communicator, a matrix of 4246 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4247 type of communicator, use the construction mechanism 4248 .vb 4249 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4250 .ve 4251 4252 $ MatCreate(...,&A); 4253 $ MatSetType(A,MATMPIAIJ); 4254 $ MatSetSizes(A, m,n,M,N); 4255 $ MatMPIAIJSetPreallocation(A,...); 4256 4257 By default, this format uses inodes (identical nodes) when possible. 4258 We search for consecutive rows with the same nonzero structure, thereby 4259 reusing matrix information to achieve increased efficiency. 4260 4261 Options Database Keys: 4262 + -mat_no_inode - Do not use inodes 4263 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4264 4265 4266 4267 Example usage: 4268 4269 Consider the following 8x8 matrix with 34 non-zero values, that is 4270 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4271 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4272 as follows 4273 4274 .vb 4275 1 2 0 | 0 3 0 | 0 4 4276 Proc0 0 5 6 | 7 0 0 | 8 0 4277 9 0 10 | 11 0 0 | 12 0 4278 ------------------------------------- 4279 13 0 14 | 15 16 17 | 0 0 4280 Proc1 0 18 0 | 19 20 21 | 0 0 4281 0 0 0 | 22 23 0 | 24 0 4282 ------------------------------------- 4283 Proc2 25 26 27 | 0 0 28 | 29 0 4284 30 0 0 | 31 32 33 | 0 34 4285 .ve 4286 4287 This can be represented as a collection of submatrices as 4288 4289 .vb 4290 A B C 4291 D E F 4292 G H I 4293 .ve 4294 4295 Where the submatrices A,B,C are owned by proc0, D,E,F are 4296 owned by proc1, G,H,I are owned by proc2. 4297 4298 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4299 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4300 The 'M','N' parameters are 8,8, and have the same values on all procs. 4301 4302 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4303 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4304 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4305 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4306 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4307 matrix, ans [DF] as another SeqAIJ matrix. 4308 4309 When d_nz, o_nz parameters are specified, d_nz storage elements are 4310 allocated for every row of the local diagonal submatrix, and o_nz 4311 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4312 One way to choose d_nz and o_nz is to use the max nonzerors per local 4313 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4314 In this case, the values of d_nz,o_nz are 4315 .vb 4316 proc0 : dnz = 2, o_nz = 2 4317 proc1 : dnz = 3, o_nz = 2 4318 proc2 : dnz = 1, o_nz = 4 4319 .ve 4320 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4321 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4322 for proc3. i.e we are using 12+15+10=37 storage locations to store 4323 34 values. 4324 4325 When d_nnz, o_nnz parameters are specified, the storage is specified 4326 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4327 In the above case the values for d_nnz,o_nnz are 4328 .vb 4329 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4330 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4331 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4332 .ve 4333 Here the space allocated is sum of all the above values i.e 34, and 4334 hence pre-allocation is perfect. 4335 4336 Level: intermediate 4337 4338 .keywords: matrix, aij, compressed row, sparse, parallel 4339 4340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4341 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4342 @*/ 4343 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4344 { 4345 PetscErrorCode ierr; 4346 PetscMPIInt size; 4347 4348 PetscFunctionBegin; 4349 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4350 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4351 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4352 if (size > 1) { 4353 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4354 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4355 } else { 4356 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4357 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4358 } 4359 PetscFunctionReturn(0); 4360 } 4361 4362 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4363 { 4364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4365 PetscBool flg; 4366 PetscErrorCode ierr; 4367 4368 PetscFunctionBegin; 4369 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4370 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4371 if (Ad) *Ad = a->A; 4372 if (Ao) *Ao = a->B; 4373 if (colmap) *colmap = a->garray; 4374 PetscFunctionReturn(0); 4375 } 4376 4377 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4378 { 4379 PetscErrorCode ierr; 4380 PetscInt m,N,i,rstart,nnz,Ii; 4381 PetscInt *indx; 4382 PetscScalar *values; 4383 4384 PetscFunctionBegin; 4385 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4386 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4387 PetscInt *dnz,*onz,sum,bs,cbs; 4388 4389 if (n == PETSC_DECIDE) { 4390 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4391 } 4392 /* Check sum(n) = N */ 4393 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4394 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4395 4396 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4397 rstart -= m; 4398 4399 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4400 for (i=0; i<m; i++) { 4401 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4402 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4403 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4404 } 4405 4406 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4407 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4408 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4409 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4410 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4411 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4412 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4413 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4414 } 4415 4416 /* numeric phase */ 4417 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4418 for (i=0; i<m; i++) { 4419 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4420 Ii = i + rstart; 4421 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4422 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4423 } 4424 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4425 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4426 PetscFunctionReturn(0); 4427 } 4428 4429 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4430 { 4431 PetscErrorCode ierr; 4432 PetscMPIInt rank; 4433 PetscInt m,N,i,rstart,nnz; 4434 size_t len; 4435 const PetscInt *indx; 4436 PetscViewer out; 4437 char *name; 4438 Mat B; 4439 const PetscScalar *values; 4440 4441 PetscFunctionBegin; 4442 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4443 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4444 /* Should this be the type of the diagonal block of A? */ 4445 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4446 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4447 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4448 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4449 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4450 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4451 for (i=0; i<m; i++) { 4452 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4453 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4454 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4455 } 4456 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4457 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4458 4459 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4460 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4461 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4462 sprintf(name,"%s.%d",outfile,rank); 4463 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4464 ierr = PetscFree(name);CHKERRQ(ierr); 4465 ierr = MatView(B,out);CHKERRQ(ierr); 4466 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4467 ierr = MatDestroy(&B);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4472 { 4473 PetscErrorCode ierr; 4474 Mat_Merge_SeqsToMPI *merge; 4475 PetscContainer container; 4476 4477 PetscFunctionBegin; 4478 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4479 if (container) { 4480 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4493 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4494 ierr = PetscFree(merge);CHKERRQ(ierr); 4495 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4496 } 4497 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4498 PetscFunctionReturn(0); 4499 } 4500 4501 #include <../src/mat/utils/freespace.h> 4502 #include <petscbt.h> 4503 4504 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4505 { 4506 PetscErrorCode ierr; 4507 MPI_Comm comm; 4508 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4509 PetscMPIInt size,rank,taga,*len_s; 4510 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4511 PetscInt proc,m; 4512 PetscInt **buf_ri,**buf_rj; 4513 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4514 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4515 MPI_Request *s_waits,*r_waits; 4516 MPI_Status *status; 4517 MatScalar *aa=a->a; 4518 MatScalar **abuf_r,*ba_i; 4519 Mat_Merge_SeqsToMPI *merge; 4520 PetscContainer container; 4521 4522 PetscFunctionBegin; 4523 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4524 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4525 4526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4528 4529 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4530 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4531 4532 bi = merge->bi; 4533 bj = merge->bj; 4534 buf_ri = merge->buf_ri; 4535 buf_rj = merge->buf_rj; 4536 4537 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4538 owners = merge->rowmap->range; 4539 len_s = merge->len_s; 4540 4541 /* send and recv matrix values */ 4542 /*-----------------------------*/ 4543 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4544 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4545 4546 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4547 for (proc=0,k=0; proc<size; proc++) { 4548 if (!len_s[proc]) continue; 4549 i = owners[proc]; 4550 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4551 k++; 4552 } 4553 4554 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4555 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4556 ierr = PetscFree(status);CHKERRQ(ierr); 4557 4558 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4559 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4560 4561 /* insert mat values of mpimat */ 4562 /*----------------------------*/ 4563 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4564 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4565 4566 for (k=0; k<merge->nrecv; k++) { 4567 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4568 nrows = *(buf_ri_k[k]); 4569 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4570 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4571 } 4572 4573 /* set values of ba */ 4574 m = merge->rowmap->n; 4575 for (i=0; i<m; i++) { 4576 arow = owners[rank] + i; 4577 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4578 bnzi = bi[i+1] - bi[i]; 4579 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4580 4581 /* add local non-zero vals of this proc's seqmat into ba */ 4582 anzi = ai[arow+1] - ai[arow]; 4583 aj = a->j + ai[arow]; 4584 aa = a->a + ai[arow]; 4585 nextaj = 0; 4586 for (j=0; nextaj<anzi; j++) { 4587 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4588 ba_i[j] += aa[nextaj++]; 4589 } 4590 } 4591 4592 /* add received vals into ba */ 4593 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4594 /* i-th row */ 4595 if (i == *nextrow[k]) { 4596 anzi = *(nextai[k]+1) - *nextai[k]; 4597 aj = buf_rj[k] + *(nextai[k]); 4598 aa = abuf_r[k] + *(nextai[k]); 4599 nextaj = 0; 4600 for (j=0; nextaj<anzi; j++) { 4601 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4602 ba_i[j] += aa[nextaj++]; 4603 } 4604 } 4605 nextrow[k]++; nextai[k]++; 4606 } 4607 } 4608 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4609 } 4610 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 4613 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4615 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4616 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4617 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4618 PetscFunctionReturn(0); 4619 } 4620 4621 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4622 { 4623 PetscErrorCode ierr; 4624 Mat B_mpi; 4625 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4626 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4627 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4628 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4629 PetscInt len,proc,*dnz,*onz,bs,cbs; 4630 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4631 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4632 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4633 MPI_Status *status; 4634 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4635 PetscBT lnkbt; 4636 Mat_Merge_SeqsToMPI *merge; 4637 PetscContainer container; 4638 4639 PetscFunctionBegin; 4640 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4641 4642 /* make sure it is a PETSc comm */ 4643 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4646 4647 ierr = PetscNew(&merge);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4649 4650 /* determine row ownership */ 4651 /*---------------------------------------------------------*/ 4652 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4653 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4654 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4655 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4656 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4657 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4658 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4659 4660 m = merge->rowmap->n; 4661 owners = merge->rowmap->range; 4662 4663 /* determine the number of messages to send, their lengths */ 4664 /*---------------------------------------------------------*/ 4665 len_s = merge->len_s; 4666 4667 len = 0; /* length of buf_si[] */ 4668 merge->nsend = 0; 4669 for (proc=0; proc<size; proc++) { 4670 len_si[proc] = 0; 4671 if (proc == rank) { 4672 len_s[proc] = 0; 4673 } else { 4674 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4675 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4676 } 4677 if (len_s[proc]) { 4678 merge->nsend++; 4679 nrows = 0; 4680 for (i=owners[proc]; i<owners[proc+1]; i++) { 4681 if (ai[i+1] > ai[i]) nrows++; 4682 } 4683 len_si[proc] = 2*(nrows+1); 4684 len += len_si[proc]; 4685 } 4686 } 4687 4688 /* determine the number and length of messages to receive for ij-structure */ 4689 /*-------------------------------------------------------------------------*/ 4690 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4691 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4692 4693 /* post the Irecv of j-structure */ 4694 /*-------------------------------*/ 4695 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4696 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4697 4698 /* post the Isend of j-structure */ 4699 /*--------------------------------*/ 4700 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4701 4702 for (proc=0, k=0; proc<size; proc++) { 4703 if (!len_s[proc]) continue; 4704 i = owners[proc]; 4705 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4706 k++; 4707 } 4708 4709 /* receives and sends of j-structure are complete */ 4710 /*------------------------------------------------*/ 4711 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4712 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4713 4714 /* send and recv i-structure */ 4715 /*---------------------------*/ 4716 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4717 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4718 4719 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4720 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4721 for (proc=0,k=0; proc<size; proc++) { 4722 if (!len_s[proc]) continue; 4723 /* form outgoing message for i-structure: 4724 buf_si[0]: nrows to be sent 4725 [1:nrows]: row index (global) 4726 [nrows+1:2*nrows+1]: i-structure index 4727 */ 4728 /*-------------------------------------------*/ 4729 nrows = len_si[proc]/2 - 1; 4730 buf_si_i = buf_si + nrows+1; 4731 buf_si[0] = nrows; 4732 buf_si_i[0] = 0; 4733 nrows = 0; 4734 for (i=owners[proc]; i<owners[proc+1]; i++) { 4735 anzi = ai[i+1] - ai[i]; 4736 if (anzi) { 4737 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4738 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4739 nrows++; 4740 } 4741 } 4742 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4743 k++; 4744 buf_si += len_si[proc]; 4745 } 4746 4747 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4748 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4749 4750 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4751 for (i=0; i<merge->nrecv; i++) { 4752 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4753 } 4754 4755 ierr = PetscFree(len_si);CHKERRQ(ierr); 4756 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4757 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4758 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4759 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4760 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4761 ierr = PetscFree(status);CHKERRQ(ierr); 4762 4763 /* compute a local seq matrix in each processor */ 4764 /*----------------------------------------------*/ 4765 /* allocate bi array and free space for accumulating nonzero column info */ 4766 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4767 bi[0] = 0; 4768 4769 /* create and initialize a linked list */ 4770 nlnk = N+1; 4771 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4772 4773 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4774 len = ai[owners[rank+1]] - ai[owners[rank]]; 4775 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4776 4777 current_space = free_space; 4778 4779 /* determine symbolic info for each local row */ 4780 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4781 4782 for (k=0; k<merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *buf_ri_k[k]; 4785 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4790 len = 0; 4791 for (i=0; i<m; i++) { 4792 bnzi = 0; 4793 /* add local non-zero cols of this proc's seqmat into lnk */ 4794 arow = owners[rank] + i; 4795 anzi = ai[arow+1] - ai[arow]; 4796 aj = a->j + ai[arow]; 4797 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4798 bnzi += nlnk; 4799 /* add received col data into lnk */ 4800 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4801 if (i == *nextrow[k]) { /* i-th row */ 4802 anzi = *(nextai[k]+1) - *nextai[k]; 4803 aj = buf_rj[k] + *nextai[k]; 4804 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4805 bnzi += nlnk; 4806 nextrow[k]++; nextai[k]++; 4807 } 4808 } 4809 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4810 4811 /* if free space is not available, make more free space */ 4812 if (current_space->local_remaining<bnzi) { 4813 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4814 nspacedouble++; 4815 } 4816 /* copy data into free space, then initialize lnk */ 4817 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4818 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4819 4820 current_space->array += bnzi; 4821 current_space->local_used += bnzi; 4822 current_space->local_remaining -= bnzi; 4823 4824 bi[i+1] = bi[i] + bnzi; 4825 } 4826 4827 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4828 4829 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4830 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4831 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4832 4833 /* create symbolic parallel matrix B_mpi */ 4834 /*---------------------------------------*/ 4835 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4836 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4837 if (n==PETSC_DECIDE) { 4838 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4839 } else { 4840 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4841 } 4842 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4843 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4844 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4845 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4846 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4847 4848 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4849 B_mpi->assembled = PETSC_FALSE; 4850 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4851 merge->bi = bi; 4852 merge->bj = bj; 4853 merge->buf_ri = buf_ri; 4854 merge->buf_rj = buf_rj; 4855 merge->coi = NULL; 4856 merge->coj = NULL; 4857 merge->owners_co = NULL; 4858 4859 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4860 4861 /* attach the supporting struct to B_mpi for reuse */ 4862 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4863 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4864 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4865 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4866 *mpimat = B_mpi; 4867 4868 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4869 PetscFunctionReturn(0); 4870 } 4871 4872 /*@C 4873 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4874 matrices from each processor 4875 4876 Collective on MPI_Comm 4877 4878 Input Parameters: 4879 + comm - the communicators the parallel matrix will live on 4880 . seqmat - the input sequential matrices 4881 . m - number of local rows (or PETSC_DECIDE) 4882 . n - number of local columns (or PETSC_DECIDE) 4883 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4884 4885 Output Parameter: 4886 . mpimat - the parallel matrix generated 4887 4888 Level: advanced 4889 4890 Notes: 4891 The dimensions of the sequential matrix in each processor MUST be the same. 4892 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4893 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4894 @*/ 4895 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4896 { 4897 PetscErrorCode ierr; 4898 PetscMPIInt size; 4899 4900 PetscFunctionBegin; 4901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4902 if (size == 1) { 4903 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 if (scall == MAT_INITIAL_MATRIX) { 4905 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4906 } else { 4907 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4908 } 4909 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4913 if (scall == MAT_INITIAL_MATRIX) { 4914 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4915 } 4916 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4917 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 4921 /*@ 4922 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4923 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4924 with MatGetSize() 4925 4926 Not Collective 4927 4928 Input Parameters: 4929 + A - the matrix 4930 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4931 4932 Output Parameter: 4933 . A_loc - the local sequential matrix generated 4934 4935 Level: developer 4936 4937 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4938 4939 @*/ 4940 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4941 { 4942 PetscErrorCode ierr; 4943 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4944 Mat_SeqAIJ *mat,*a,*b; 4945 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4946 MatScalar *aa,*ba,*cam; 4947 PetscScalar *ca; 4948 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4949 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4950 PetscBool match; 4951 MPI_Comm comm; 4952 PetscMPIInt size; 4953 4954 PetscFunctionBegin; 4955 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4956 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4957 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4958 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4959 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4960 4961 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4962 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4963 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4964 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4965 aa = a->a; ba = b->a; 4966 if (scall == MAT_INITIAL_MATRIX) { 4967 if (size == 1) { 4968 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4969 PetscFunctionReturn(0); 4970 } 4971 4972 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4973 ci[0] = 0; 4974 for (i=0; i<am; i++) { 4975 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4976 } 4977 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4978 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4979 k = 0; 4980 for (i=0; i<am; i++) { 4981 ncols_o = bi[i+1] - bi[i]; 4982 ncols_d = ai[i+1] - ai[i]; 4983 /* off-diagonal portion of A */ 4984 for (jo=0; jo<ncols_o; jo++) { 4985 col = cmap[*bj]; 4986 if (col >= cstart) break; 4987 cj[k] = col; bj++; 4988 ca[k++] = *ba++; 4989 } 4990 /* diagonal portion of A */ 4991 for (j=0; j<ncols_d; j++) { 4992 cj[k] = cstart + *aj++; 4993 ca[k++] = *aa++; 4994 } 4995 /* off-diagonal portion of A */ 4996 for (j=jo; j<ncols_o; j++) { 4997 cj[k] = cmap[*bj++]; 4998 ca[k++] = *ba++; 4999 } 5000 } 5001 /* put together the new matrix */ 5002 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5003 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5004 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5005 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5006 mat->free_a = PETSC_TRUE; 5007 mat->free_ij = PETSC_TRUE; 5008 mat->nonew = 0; 5009 } else if (scall == MAT_REUSE_MATRIX) { 5010 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5011 ci = mat->i; cj = mat->j; cam = mat->a; 5012 for (i=0; i<am; i++) { 5013 /* off-diagonal portion of A */ 5014 ncols_o = bi[i+1] - bi[i]; 5015 for (jo=0; jo<ncols_o; jo++) { 5016 col = cmap[*bj]; 5017 if (col >= cstart) break; 5018 *cam++ = *ba++; bj++; 5019 } 5020 /* diagonal portion of A */ 5021 ncols_d = ai[i+1] - ai[i]; 5022 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5023 /* off-diagonal portion of A */ 5024 for (j=jo; j<ncols_o; j++) { 5025 *cam++ = *ba++; bj++; 5026 } 5027 } 5028 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5029 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5030 PetscFunctionReturn(0); 5031 } 5032 5033 /*@C 5034 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5035 5036 Not Collective 5037 5038 Input Parameters: 5039 + A - the matrix 5040 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5041 - row, col - index sets of rows and columns to extract (or NULL) 5042 5043 Output Parameter: 5044 . A_loc - the local sequential matrix generated 5045 5046 Level: developer 5047 5048 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5049 5050 @*/ 5051 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5052 { 5053 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5054 PetscErrorCode ierr; 5055 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5056 IS isrowa,iscola; 5057 Mat *aloc; 5058 PetscBool match; 5059 5060 PetscFunctionBegin; 5061 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5062 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5063 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5064 if (!row) { 5065 start = A->rmap->rstart; end = A->rmap->rend; 5066 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5067 } else { 5068 isrowa = *row; 5069 } 5070 if (!col) { 5071 start = A->cmap->rstart; 5072 cmap = a->garray; 5073 nzA = a->A->cmap->n; 5074 nzB = a->B->cmap->n; 5075 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5076 ncols = 0; 5077 for (i=0; i<nzB; i++) { 5078 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5079 else break; 5080 } 5081 imark = i; 5082 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5083 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5084 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5085 } else { 5086 iscola = *col; 5087 } 5088 if (scall != MAT_INITIAL_MATRIX) { 5089 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5090 aloc[0] = *A_loc; 5091 } 5092 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5093 if (!col) { /* attach global id of condensed columns */ 5094 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5095 } 5096 *A_loc = aloc[0]; 5097 ierr = PetscFree(aloc);CHKERRQ(ierr); 5098 if (!row) { 5099 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5100 } 5101 if (!col) { 5102 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5103 } 5104 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 /*@C 5109 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5110 5111 Collective on Mat 5112 5113 Input Parameters: 5114 + A,B - the matrices in mpiaij format 5115 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5117 5118 Output Parameter: 5119 + rowb, colb - index sets of rows and columns of B to extract 5120 - B_seq - the sequential matrix generated 5121 5122 Level: developer 5123 5124 @*/ 5125 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5126 { 5127 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5128 PetscErrorCode ierr; 5129 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5130 IS isrowb,iscolb; 5131 Mat *bseq=NULL; 5132 5133 PetscFunctionBegin; 5134 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5135 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5136 } 5137 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5138 5139 if (scall == MAT_INITIAL_MATRIX) { 5140 start = A->cmap->rstart; 5141 cmap = a->garray; 5142 nzA = a->A->cmap->n; 5143 nzB = a->B->cmap->n; 5144 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5145 ncols = 0; 5146 for (i=0; i<nzB; i++) { /* row < local row index */ 5147 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5148 else break; 5149 } 5150 imark = i; 5151 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5152 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5153 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5154 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5155 } else { 5156 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5157 isrowb = *rowb; iscolb = *colb; 5158 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5159 bseq[0] = *B_seq; 5160 } 5161 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5162 *B_seq = bseq[0]; 5163 ierr = PetscFree(bseq);CHKERRQ(ierr); 5164 if (!rowb) { 5165 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5166 } else { 5167 *rowb = isrowb; 5168 } 5169 if (!colb) { 5170 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5171 } else { 5172 *colb = iscolb; 5173 } 5174 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5175 PetscFunctionReturn(0); 5176 } 5177 5178 /* 5179 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5180 of the OFF-DIAGONAL portion of local A 5181 5182 Collective on Mat 5183 5184 Input Parameters: 5185 + A,B - the matrices in mpiaij format 5186 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5187 5188 Output Parameter: 5189 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5190 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5191 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5192 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5193 5194 Level: developer 5195 5196 */ 5197 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5198 { 5199 VecScatter_MPI_General *gen_to,*gen_from; 5200 PetscErrorCode ierr; 5201 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5202 Mat_SeqAIJ *b_oth; 5203 VecScatter ctx; 5204 MPI_Comm comm; 5205 PetscMPIInt *rprocs,*sprocs,tag,rank; 5206 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5207 PetscInt *rvalues,*svalues,*cols,sbs,rbs; 5208 PetscScalar *b_otha,*bufa,*bufA,*vals; 5209 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5210 MPI_Request *rwaits = NULL,*swaits = NULL; 5211 MPI_Status *sstatus,rstatus; 5212 PetscMPIInt jj,size; 5213 VecScatterType type; 5214 PetscBool mpi1; 5215 5216 PetscFunctionBegin; 5217 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5218 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5219 5220 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5221 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5222 } 5223 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5224 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5225 5226 if (size == 1) { 5227 startsj_s = NULL; 5228 bufa_ptr = NULL; 5229 *B_oth = NULL; 5230 PetscFunctionReturn(0); 5231 } 5232 5233 ctx = a->Mvctx; 5234 ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr); 5235 ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr); 5236 if (!mpi1) { 5237 /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops, 5238 thus create a->Mvctx_mpi1 */ 5239 if (!a->Mvctx_mpi1) { 5240 a->Mvctx_mpi1_flg = PETSC_TRUE; 5241 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5242 } 5243 ctx = a->Mvctx_mpi1; 5244 } 5245 tag = ((PetscObject)ctx)->tag; 5246 5247 gen_to = (VecScatter_MPI_General*)ctx->todata; 5248 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5249 nrecvs = gen_from->n; 5250 nsends = gen_to->n; 5251 5252 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5253 srow = gen_to->indices; /* local row index to be sent */ 5254 sstarts = gen_to->starts; 5255 sprocs = gen_to->procs; 5256 sstatus = gen_to->sstatus; 5257 sbs = gen_to->bs; 5258 rstarts = gen_from->starts; 5259 rprocs = gen_from->procs; 5260 rbs = gen_from->bs; 5261 5262 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5263 if (scall == MAT_INITIAL_MATRIX) { 5264 /* i-array */ 5265 /*---------*/ 5266 /* post receives */ 5267 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5268 for (i=0; i<nrecvs; i++) { 5269 rowlen = rvalues + rstarts[i]*rbs; 5270 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5271 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5272 } 5273 5274 /* pack the outgoing message */ 5275 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5276 5277 sstartsj[0] = 0; 5278 rstartsj[0] = 0; 5279 len = 0; /* total length of j or a array to be sent */ 5280 k = 0; 5281 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5282 for (i=0; i<nsends; i++) { 5283 rowlen = svalues + sstarts[i]*sbs; 5284 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5285 for (j=0; j<nrows; j++) { 5286 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5287 for (l=0; l<sbs; l++) { 5288 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5289 5290 rowlen[j*sbs+l] = ncols; 5291 5292 len += ncols; 5293 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5294 } 5295 k++; 5296 } 5297 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5298 5299 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5300 } 5301 /* recvs and sends of i-array are completed */ 5302 i = nrecvs; 5303 while (i--) { 5304 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5305 } 5306 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5307 ierr = PetscFree(svalues);CHKERRQ(ierr); 5308 5309 /* allocate buffers for sending j and a arrays */ 5310 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5311 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5312 5313 /* create i-array of B_oth */ 5314 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5315 5316 b_othi[0] = 0; 5317 len = 0; /* total length of j or a array to be received */ 5318 k = 0; 5319 for (i=0; i<nrecvs; i++) { 5320 rowlen = rvalues + rstarts[i]*rbs; 5321 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5322 for (j=0; j<nrows; j++) { 5323 b_othi[k+1] = b_othi[k] + rowlen[j]; 5324 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5325 k++; 5326 } 5327 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5328 } 5329 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5330 5331 /* allocate space for j and a arrrays of B_oth */ 5332 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5333 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5334 5335 /* j-array */ 5336 /*---------*/ 5337 /* post receives of j-array */ 5338 for (i=0; i<nrecvs; i++) { 5339 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5340 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5341 } 5342 5343 /* pack the outgoing message j-array */ 5344 k = 0; 5345 for (i=0; i<nsends; i++) { 5346 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5347 bufJ = bufj+sstartsj[i]; 5348 for (j=0; j<nrows; j++) { 5349 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5350 for (ll=0; ll<sbs; ll++) { 5351 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5352 for (l=0; l<ncols; l++) { 5353 *bufJ++ = cols[l]; 5354 } 5355 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5356 } 5357 } 5358 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5359 } 5360 5361 /* recvs and sends of j-array are completed */ 5362 i = nrecvs; 5363 while (i--) { 5364 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5365 } 5366 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5367 } else if (scall == MAT_REUSE_MATRIX) { 5368 sstartsj = *startsj_s; 5369 rstartsj = *startsj_r; 5370 bufa = *bufa_ptr; 5371 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5372 b_otha = b_oth->a; 5373 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5374 5375 /* a-array */ 5376 /*---------*/ 5377 /* post receives of a-array */ 5378 for (i=0; i<nrecvs; i++) { 5379 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5380 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5381 } 5382 5383 /* pack the outgoing message a-array */ 5384 k = 0; 5385 for (i=0; i<nsends; i++) { 5386 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5387 bufA = bufa+sstartsj[i]; 5388 for (j=0; j<nrows; j++) { 5389 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5390 for (ll=0; ll<sbs; ll++) { 5391 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5392 for (l=0; l<ncols; l++) { 5393 *bufA++ = vals[l]; 5394 } 5395 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5396 } 5397 } 5398 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5399 } 5400 /* recvs and sends of a-array are completed */ 5401 i = nrecvs; 5402 while (i--) { 5403 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5404 } 5405 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5406 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5407 5408 if (scall == MAT_INITIAL_MATRIX) { 5409 /* put together the new matrix */ 5410 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5411 5412 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5413 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5414 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5415 b_oth->free_a = PETSC_TRUE; 5416 b_oth->free_ij = PETSC_TRUE; 5417 b_oth->nonew = 0; 5418 5419 ierr = PetscFree(bufj);CHKERRQ(ierr); 5420 if (!startsj_s || !bufa_ptr) { 5421 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5422 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5423 } else { 5424 *startsj_s = sstartsj; 5425 *startsj_r = rstartsj; 5426 *bufa_ptr = bufa; 5427 } 5428 } 5429 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5430 PetscFunctionReturn(0); 5431 } 5432 5433 /*@C 5434 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5435 5436 Not Collective 5437 5438 Input Parameters: 5439 . A - The matrix in mpiaij format 5440 5441 Output Parameter: 5442 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5443 . colmap - A map from global column index to local index into lvec 5444 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5445 5446 Level: developer 5447 5448 @*/ 5449 #if defined(PETSC_USE_CTABLE) 5450 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5451 #else 5452 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5453 #endif 5454 { 5455 Mat_MPIAIJ *a; 5456 5457 PetscFunctionBegin; 5458 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5459 PetscValidPointer(lvec, 2); 5460 PetscValidPointer(colmap, 3); 5461 PetscValidPointer(multScatter, 4); 5462 a = (Mat_MPIAIJ*) A->data; 5463 if (lvec) *lvec = a->lvec; 5464 if (colmap) *colmap = a->colmap; 5465 if (multScatter) *multScatter = a->Mvctx; 5466 PetscFunctionReturn(0); 5467 } 5468 5469 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5470 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5471 #if defined(PETSC_HAVE_MKL_SPARSE) 5472 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5473 #endif 5474 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5475 #if defined(PETSC_HAVE_ELEMENTAL) 5476 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5477 #endif 5478 #if defined(PETSC_HAVE_HYPRE) 5479 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5480 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5481 #endif 5482 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5483 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5484 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5485 5486 /* 5487 Computes (B'*A')' since computing B*A directly is untenable 5488 5489 n p p 5490 ( ) ( ) ( ) 5491 m ( A ) * n ( B ) = m ( C ) 5492 ( ) ( ) ( ) 5493 5494 */ 5495 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5496 { 5497 PetscErrorCode ierr; 5498 Mat At,Bt,Ct; 5499 5500 PetscFunctionBegin; 5501 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5502 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5503 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5504 ierr = MatDestroy(&At);CHKERRQ(ierr); 5505 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5506 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5507 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5508 PetscFunctionReturn(0); 5509 } 5510 5511 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5512 { 5513 PetscErrorCode ierr; 5514 PetscInt m=A->rmap->n,n=B->cmap->n; 5515 Mat Cmat; 5516 5517 PetscFunctionBegin; 5518 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5519 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5520 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5521 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5522 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5523 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5524 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5525 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5526 5527 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5528 5529 *C = Cmat; 5530 PetscFunctionReturn(0); 5531 } 5532 5533 /* ----------------------------------------------------------------*/ 5534 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5535 { 5536 PetscErrorCode ierr; 5537 5538 PetscFunctionBegin; 5539 if (scall == MAT_INITIAL_MATRIX) { 5540 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5541 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5542 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5543 } 5544 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5545 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5546 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5547 PetscFunctionReturn(0); 5548 } 5549 5550 /*MC 5551 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5552 5553 Options Database Keys: 5554 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5555 5556 Level: beginner 5557 5558 .seealso: MatCreateAIJ() 5559 M*/ 5560 5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5562 { 5563 Mat_MPIAIJ *b; 5564 PetscErrorCode ierr; 5565 PetscMPIInt size; 5566 5567 PetscFunctionBegin; 5568 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5569 5570 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5571 B->data = (void*)b; 5572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5573 B->assembled = PETSC_FALSE; 5574 B->insertmode = NOT_SET_VALUES; 5575 b->size = size; 5576 5577 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5578 5579 /* build cache for off array entries formed */ 5580 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5581 5582 b->donotstash = PETSC_FALSE; 5583 b->colmap = 0; 5584 b->garray = 0; 5585 b->roworiented = PETSC_TRUE; 5586 5587 /* stuff used for matrix vector multiply */ 5588 b->lvec = NULL; 5589 b->Mvctx = NULL; 5590 5591 /* stuff for MatGetRow() */ 5592 b->rowindices = 0; 5593 b->rowvalues = 0; 5594 b->getrowactive = PETSC_FALSE; 5595 5596 /* flexible pointer used in CUSP/CUSPARSE classes */ 5597 b->spptr = NULL; 5598 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5608 #if defined(PETSC_HAVE_MKL_SPARSE) 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5610 #endif 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5613 #if defined(PETSC_HAVE_ELEMENTAL) 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5615 #endif 5616 #if defined(PETSC_HAVE_HYPRE) 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5618 #endif 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5623 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5624 #if defined(PETSC_HAVE_HYPRE) 5625 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5626 #endif 5627 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5628 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5629 PetscFunctionReturn(0); 5630 } 5631 5632 /*@C 5633 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5634 and "off-diagonal" part of the matrix in CSR format. 5635 5636 Collective on MPI_Comm 5637 5638 Input Parameters: 5639 + comm - MPI communicator 5640 . m - number of local rows (Cannot be PETSC_DECIDE) 5641 . n - This value should be the same as the local size used in creating the 5642 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5643 calculated if N is given) For square matrices n is almost always m. 5644 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5645 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5646 . i - row indices for "diagonal" portion of matrix 5647 . j - column indices 5648 . a - matrix values 5649 . oi - row indices for "off-diagonal" portion of matrix 5650 . oj - column indices 5651 - oa - matrix values 5652 5653 Output Parameter: 5654 . mat - the matrix 5655 5656 Level: advanced 5657 5658 Notes: 5659 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5660 must free the arrays once the matrix has been destroyed and not before. 5661 5662 The i and j indices are 0 based 5663 5664 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5665 5666 This sets local rows and cannot be used to set off-processor values. 5667 5668 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5669 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5670 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5671 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5672 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5673 communication if it is known that only local entries will be set. 5674 5675 .keywords: matrix, aij, compressed row, sparse, parallel 5676 5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5678 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5679 @*/ 5680 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5681 { 5682 PetscErrorCode ierr; 5683 Mat_MPIAIJ *maij; 5684 5685 PetscFunctionBegin; 5686 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5687 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5688 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5689 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5690 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5691 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5692 maij = (Mat_MPIAIJ*) (*mat)->data; 5693 5694 (*mat)->preallocated = PETSC_TRUE; 5695 5696 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5697 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5698 5699 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5700 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5706 5707 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5708 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5709 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5710 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5711 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5712 PetscFunctionReturn(0); 5713 } 5714 5715 /* 5716 Special version for direct calls from Fortran 5717 */ 5718 #include <petsc/private/fortranimpl.h> 5719 5720 /* Change these macros so can be used in void function */ 5721 #undef CHKERRQ 5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5723 #undef SETERRQ2 5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5725 #undef SETERRQ3 5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5727 #undef SETERRQ 5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5729 5730 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5731 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5732 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5733 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5734 #else 5735 #endif 5736 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5737 { 5738 Mat mat = *mmat; 5739 PetscInt m = *mm, n = *mn; 5740 InsertMode addv = *maddv; 5741 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5742 PetscScalar value; 5743 PetscErrorCode ierr; 5744 5745 MatCheckPreallocated(mat,1); 5746 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5747 5748 #if defined(PETSC_USE_DEBUG) 5749 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5750 #endif 5751 { 5752 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5753 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5754 PetscBool roworiented = aij->roworiented; 5755 5756 /* Some Variables required in the macro */ 5757 Mat A = aij->A; 5758 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5759 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5760 MatScalar *aa = a->a; 5761 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5762 Mat B = aij->B; 5763 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5764 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5765 MatScalar *ba = b->a; 5766 5767 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5768 PetscInt nonew = a->nonew; 5769 MatScalar *ap1,*ap2; 5770 5771 PetscFunctionBegin; 5772 for (i=0; i<m; i++) { 5773 if (im[i] < 0) continue; 5774 #if defined(PETSC_USE_DEBUG) 5775 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5776 #endif 5777 if (im[i] >= rstart && im[i] < rend) { 5778 row = im[i] - rstart; 5779 lastcol1 = -1; 5780 rp1 = aj + ai[row]; 5781 ap1 = aa + ai[row]; 5782 rmax1 = aimax[row]; 5783 nrow1 = ailen[row]; 5784 low1 = 0; 5785 high1 = nrow1; 5786 lastcol2 = -1; 5787 rp2 = bj + bi[row]; 5788 ap2 = ba + bi[row]; 5789 rmax2 = bimax[row]; 5790 nrow2 = bilen[row]; 5791 low2 = 0; 5792 high2 = nrow2; 5793 5794 for (j=0; j<n; j++) { 5795 if (roworiented) value = v[i*n+j]; 5796 else value = v[i+j*m]; 5797 if (in[j] >= cstart && in[j] < cend) { 5798 col = in[j] - cstart; 5799 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5800 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5801 } else if (in[j] < 0) continue; 5802 #if defined(PETSC_USE_DEBUG) 5803 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5804 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5805 #endif 5806 else { 5807 if (mat->was_assembled) { 5808 if (!aij->colmap) { 5809 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5810 } 5811 #if defined(PETSC_USE_CTABLE) 5812 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5813 col--; 5814 #else 5815 col = aij->colmap[in[j]] - 1; 5816 #endif 5817 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5818 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5819 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5820 col = in[j]; 5821 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5822 B = aij->B; 5823 b = (Mat_SeqAIJ*)B->data; 5824 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5825 rp2 = bj + bi[row]; 5826 ap2 = ba + bi[row]; 5827 rmax2 = bimax[row]; 5828 nrow2 = bilen[row]; 5829 low2 = 0; 5830 high2 = nrow2; 5831 bm = aij->B->rmap->n; 5832 ba = b->a; 5833 } 5834 } else col = in[j]; 5835 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5836 } 5837 } 5838 } else if (!aij->donotstash) { 5839 if (roworiented) { 5840 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5841 } else { 5842 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5843 } 5844 } 5845 } 5846 } 5847 PetscFunctionReturnVoid(); 5848 } 5849 5850