1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *owners = A->rmap->range; 787 PetscInt n = A->rmap->n; 788 PetscSF sf; 789 PetscInt *lrows; 790 PetscSFNode *rrows; 791 PetscInt r, p = 0, len = 0; 792 PetscErrorCode ierr; 793 794 PetscFunctionBegin; 795 /* Create SF where leaves are input rows and roots are owned rows */ 796 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 797 for (r = 0; r < n; ++r) lrows[r] = -1; 798 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 799 for (r = 0; r < N; ++r) { 800 const PetscInt idx = rows[r]; 801 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 802 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 803 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 804 } 805 if (A->nooffproczerorows) { 806 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 807 lrows[len++] = idx - owners[p]; 808 } else { 809 rrows[r].rank = p; 810 rrows[r].index = rows[r] - owners[p]; 811 } 812 } 813 if (!A->nooffproczerorows) { 814 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 815 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 816 /* Collect flags for rows to be zeroed */ 817 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 818 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 819 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 820 /* Compress and put in row numbers */ 821 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 822 } 823 /* fix right hand side if needed */ 824 if (x && b) { 825 const PetscScalar *xx; 826 PetscScalar *bb; 827 828 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 829 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 830 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 831 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 832 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 833 } 834 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 835 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 836 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 837 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 838 } else if (diag != 0.0) { 839 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 840 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 841 for (r = 0; r < len; ++r) { 842 const PetscInt row = lrows[r] + A->rmap->rstart; 843 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 844 } 845 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 847 } else { 848 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 849 } 850 ierr = PetscFree(lrows);CHKERRQ(ierr); 851 852 /* only change matrix nonzero state if pattern was allowed to be changed */ 853 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 854 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 855 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 862 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 863 { 864 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 865 PetscErrorCode ierr; 866 PetscMPIInt n = A->rmap->n; 867 PetscInt i,j,r,m,p = 0,len = 0; 868 PetscInt *lrows,*owners = A->rmap->range; 869 PetscSFNode *rrows; 870 PetscSF sf; 871 const PetscScalar *xx; 872 PetscScalar *bb,*mask; 873 Vec xmask,lmask; 874 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 875 const PetscInt *aj, *ii,*ridx; 876 PetscScalar *aa; 877 878 PetscFunctionBegin; 879 /* Create SF where leaves are input rows and roots are owned rows */ 880 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 881 for (r = 0; r < n; ++r) lrows[r] = -1; 882 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 883 for (r = 0; r < N; ++r) { 884 const PetscInt idx = rows[r]; 885 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 886 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 887 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 888 } 889 rrows[r].rank = p; 890 rrows[r].index = rows[r] - owners[p]; 891 } 892 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 893 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 894 /* Collect flags for rows to be zeroed */ 895 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 896 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 897 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 898 /* Compress and put in row numbers */ 899 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 900 /* zero diagonal part of matrix */ 901 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 902 /* handle off diagonal part of matrix */ 903 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 904 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 905 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 906 for (i=0; i<len; i++) bb[lrows[i]] = 1; 907 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 908 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 910 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 911 if (x) { 912 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 916 } 917 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 918 /* remove zeroed rows of off diagonal matrix */ 919 ii = aij->i; 920 for (i=0; i<len; i++) { 921 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 922 } 923 /* loop over all elements of off process part of matrix zeroing removed columns*/ 924 if (aij->compressedrow.use) { 925 m = aij->compressedrow.nrows; 926 ii = aij->compressedrow.i; 927 ridx = aij->compressedrow.rindex; 928 for (i=0; i<m; i++) { 929 n = ii[i+1] - ii[i]; 930 aj = aij->j + ii[i]; 931 aa = aij->a + ii[i]; 932 933 for (j=0; j<n; j++) { 934 if (PetscAbsScalar(mask[*aj])) { 935 if (b) bb[*ridx] -= *aa*xx[*aj]; 936 *aa = 0.0; 937 } 938 aa++; 939 aj++; 940 } 941 ridx++; 942 } 943 } else { /* do not use compressed row format */ 944 m = l->B->rmap->n; 945 for (i=0; i<m; i++) { 946 n = ii[i+1] - ii[i]; 947 aj = aij->j + ii[i]; 948 aa = aij->a + ii[i]; 949 for (j=0; j<n; j++) { 950 if (PetscAbsScalar(mask[*aj])) { 951 if (b) bb[i] -= *aa*xx[*aj]; 952 *aa = 0.0; 953 } 954 aa++; 955 aj++; 956 } 957 } 958 } 959 if (x) { 960 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 961 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 962 } 963 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 964 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 965 ierr = PetscFree(lrows);CHKERRQ(ierr); 966 967 /* only change matrix nonzero state if pattern was allowed to be changed */ 968 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 969 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 970 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 971 } 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMult_MPIAIJ" 977 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt nt; 982 983 PetscFunctionBegin; 984 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 985 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 995 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 1000 PetscFunctionBegin; 1001 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1007 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1008 { 1009 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1010 PetscErrorCode ierr; 1011 1012 PetscFunctionBegin; 1013 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1016 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1022 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1023 { 1024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1025 PetscErrorCode ierr; 1026 PetscBool merged; 1027 1028 PetscFunctionBegin; 1029 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1030 /* do nondiagonal part */ 1031 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1032 if (!merged) { 1033 /* send it on its way */ 1034 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1035 /* do local part */ 1036 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1037 /* receive remote parts: note this assumes the values are not actually */ 1038 /* added in yy until the next line, */ 1039 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1040 } else { 1041 /* do local part */ 1042 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1043 /* send it on its way */ 1044 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1045 /* values actually were received in the Begin() but we need to call this nop */ 1046 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1047 } 1048 PetscFunctionReturn(0); 1049 } 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1053 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1054 { 1055 MPI_Comm comm; 1056 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1057 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1058 IS Me,Notme; 1059 PetscErrorCode ierr; 1060 PetscInt M,N,first,last,*notme,i; 1061 PetscMPIInt size; 1062 1063 PetscFunctionBegin; 1064 /* Easy test: symmetric diagonal block */ 1065 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1066 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1067 if (!*f) PetscFunctionReturn(0); 1068 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1069 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1070 if (size == 1) PetscFunctionReturn(0); 1071 1072 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1073 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1074 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1076 for (i=0; i<first; i++) notme[i] = i; 1077 for (i=last; i<M; i++) notme[i-last+first] = i; 1078 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1079 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1080 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1081 Aoff = Aoffs[0]; 1082 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1083 Boff = Boffs[0]; 1084 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1085 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1086 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1087 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1088 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1089 ierr = PetscFree(notme);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1095 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1096 { 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 PetscErrorCode ierr; 1099 1100 PetscFunctionBegin; 1101 /* do nondiagonal part */ 1102 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1103 /* send it on its way */ 1104 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1105 /* do local part */ 1106 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 /* receive remote parts */ 1108 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 This only works correctly for square matrices where the subblock A->A is the 1114 diagonal block 1115 */ 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1118 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1119 { 1120 PetscErrorCode ierr; 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 1123 PetscFunctionBegin; 1124 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1125 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1126 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatScale_MPIAIJ" 1132 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1139 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatDestroy_MPIAIJ" 1145 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1146 { 1147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 #if defined(PETSC_USE_LOG) 1152 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1153 #endif 1154 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1155 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1156 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1157 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1158 #if defined(PETSC_USE_CTABLE) 1159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1160 #else 1161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1162 #endif 1163 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1166 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1167 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1168 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1169 1170 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1174 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1175 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1181 #endif 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1187 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1192 PetscErrorCode ierr; 1193 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1194 int fd; 1195 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1196 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1197 PetscScalar *column_values; 1198 PetscInt message_count,flowcontrolcount; 1199 FILE *file; 1200 1201 PetscFunctionBegin; 1202 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1203 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1204 nz = A->nz + B->nz; 1205 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1206 if (!rank) { 1207 header[0] = MAT_FILE_CLASSID; 1208 header[1] = mat->rmap->N; 1209 header[2] = mat->cmap->N; 1210 1211 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 /* get largest number of rows any processor has */ 1214 rlen = mat->rmap->n; 1215 range = mat->rmap->range; 1216 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1217 } else { 1218 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 rlen = mat->rmap->n; 1220 } 1221 1222 /* load up the local row counts */ 1223 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1224 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1225 1226 /* store the row lengths to the file */ 1227 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1228 if (!rank) { 1229 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1230 for (i=1; i<size; i++) { 1231 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1232 rlen = range[i+1] - range[i]; 1233 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1234 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1235 } 1236 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1237 } else { 1238 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1239 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1240 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1241 } 1242 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1243 1244 /* load up the local column indices */ 1245 nzmax = nz; /* th processor needs space a largest processor needs */ 1246 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1248 cnt = 0; 1249 for (i=0; i<mat->rmap->n; i++) { 1250 for (j=B->i[i]; j<B->i[i+1]; j++) { 1251 if ((col = garray[B->j[j]]) > cstart) break; 1252 column_indices[cnt++] = col; 1253 } 1254 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1255 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1256 } 1257 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1258 1259 /* store the column indices to the file */ 1260 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1261 if (!rank) { 1262 MPI_Status status; 1263 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1264 for (i=1; i<size; i++) { 1265 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1266 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1267 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1268 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1269 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1270 } 1271 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1274 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1276 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1277 } 1278 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1279 1280 /* load up the local column values */ 1281 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1282 cnt = 0; 1283 for (i=0; i<mat->rmap->n; i++) { 1284 for (j=B->i[i]; j<B->i[i+1]; j++) { 1285 if (garray[B->j[j]] > cstart) break; 1286 column_values[cnt++] = B->a[j]; 1287 } 1288 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1289 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1290 } 1291 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1292 1293 /* store the column values to the file */ 1294 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1295 if (!rank) { 1296 MPI_Status status; 1297 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1298 for (i=1; i<size; i++) { 1299 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1300 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1301 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1302 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1303 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1304 } 1305 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1306 } else { 1307 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1308 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1310 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1311 } 1312 ierr = PetscFree(column_values);CHKERRQ(ierr); 1313 1314 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1315 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include <petscdraw.h> 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1322 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1323 { 1324 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1325 PetscErrorCode ierr; 1326 PetscMPIInt rank = aij->rank,size = aij->size; 1327 PetscBool isdraw,iascii,isbinary; 1328 PetscViewer sviewer; 1329 PetscViewerFormat format; 1330 1331 PetscFunctionBegin; 1332 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1333 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1335 if (iascii) { 1336 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1337 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1338 MatInfo info; 1339 PetscBool inodes; 1340 1341 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1342 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1343 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1345 if (!inodes) { 1346 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1347 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1348 } else { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1353 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1354 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1359 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1362 PetscInt inodecount,inodelimit,*inodes; 1363 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1364 if (inodes) { 1365 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1366 } else { 1367 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1368 } 1369 PetscFunctionReturn(0); 1370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1371 PetscFunctionReturn(0); 1372 } 1373 } else if (isbinary) { 1374 if (size == 1) { 1375 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1376 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1379 } 1380 PetscFunctionReturn(0); 1381 } else if (isdraw) { 1382 PetscDraw draw; 1383 PetscBool isnull; 1384 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1385 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1386 if (isnull) PetscFunctionReturn(0); 1387 } 1388 1389 { 1390 /* assemble the entire matrix onto first processor. */ 1391 Mat A; 1392 Mat_SeqAIJ *Aloc; 1393 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1394 MatScalar *a; 1395 1396 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1397 if (!rank) { 1398 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1399 } else { 1400 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1401 } 1402 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1403 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1405 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1406 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1407 1408 /* copy over the A part */ 1409 Aloc = (Mat_SeqAIJ*)aij->A->data; 1410 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1411 row = mat->rmap->rstart; 1412 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1413 for (i=0; i<m; i++) { 1414 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1415 row++; 1416 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1417 } 1418 aj = Aloc->j; 1419 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1420 1421 /* copy over the B part */ 1422 Aloc = (Mat_SeqAIJ*)aij->B->data; 1423 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1424 row = mat->rmap->rstart; 1425 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1426 ct = cols; 1427 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1428 for (i=0; i<m; i++) { 1429 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1430 row++; 1431 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1432 } 1433 ierr = PetscFree(ct);CHKERRQ(ierr); 1434 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1435 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 /* 1437 Everyone has to call to draw the matrix since the graphics waits are 1438 synchronized across all processors that share the PetscDraw object 1439 */ 1440 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1441 if (!rank) { 1442 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1443 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1446 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 1568 matin->errortype = mat->A->errortype; 1569 PetscFunctionReturn(0); 1570 } 1571 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatPermute_MPIAIJ" 1574 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1575 { 1576 Mat aA,aB,Aperm; 1577 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1578 PetscScalar *aa,*ba; 1579 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1580 PetscSF rowsf,sf; 1581 IS parcolp = NULL; 1582 PetscBool done; 1583 PetscErrorCode ierr; 1584 1585 PetscFunctionBegin; 1586 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1587 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1588 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1589 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1590 1591 /* Invert row permutation to find out where my rows should go */ 1592 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1593 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1594 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1595 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1596 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1597 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1598 1599 /* Invert column permutation to find out where my columns should go */ 1600 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1601 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1602 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1603 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1604 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1605 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1607 1608 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1610 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1611 1612 /* Find out where my gcols should go */ 1613 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1614 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1619 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1620 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1621 1622 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1623 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 for (i=0; i<m; i++) { 1626 PetscInt row = rdest[i],rowner; 1627 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1628 for (j=ai[i]; j<ai[i+1]; j++) { 1629 PetscInt cowner,col = cdest[aj[j]]; 1630 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1631 if (rowner == cowner) dnnz[i]++; 1632 else onnz[i]++; 1633 } 1634 for (j=bi[i]; j<bi[i+1]; j++) { 1635 PetscInt cowner,col = gcdest[bj[j]]; 1636 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1637 if (rowner == cowner) dnnz[i]++; 1638 else onnz[i]++; 1639 } 1640 } 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1643 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1644 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1645 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1646 1647 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1648 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1649 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1650 for (i=0; i<m; i++) { 1651 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1652 PetscInt j0,rowlen; 1653 rowlen = ai[i+1] - ai[i]; 1654 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1655 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1656 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1657 } 1658 rowlen = bi[i+1] - bi[i]; 1659 for (j0=j=0; j<rowlen; j0=j) { 1660 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1661 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1662 } 1663 } 1664 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1665 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1667 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1668 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1669 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1670 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1671 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1672 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1673 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1674 *B = Aperm; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1680 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1681 { 1682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1687 if (ghosts) *ghosts = aij->garray; 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1693 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1694 { 1695 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1696 Mat A = mat->A,B = mat->B; 1697 PetscErrorCode ierr; 1698 PetscReal isend[5],irecv[5]; 1699 1700 PetscFunctionBegin; 1701 info->block_size = 1.0; 1702 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1703 1704 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1705 isend[3] = info->memory; isend[4] = info->mallocs; 1706 1707 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1708 1709 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1710 isend[3] += info->memory; isend[4] += info->mallocs; 1711 if (flag == MAT_LOCAL) { 1712 info->nz_used = isend[0]; 1713 info->nz_allocated = isend[1]; 1714 info->nz_unneeded = isend[2]; 1715 info->memory = isend[3]; 1716 info->mallocs = isend[4]; 1717 } else if (flag == MAT_GLOBAL_MAX) { 1718 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1719 1720 info->nz_used = irecv[0]; 1721 info->nz_allocated = irecv[1]; 1722 info->nz_unneeded = irecv[2]; 1723 info->memory = irecv[3]; 1724 info->mallocs = irecv[4]; 1725 } else if (flag == MAT_GLOBAL_SUM) { 1726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1727 1728 info->nz_used = irecv[0]; 1729 info->nz_allocated = irecv[1]; 1730 info->nz_unneeded = irecv[2]; 1731 info->memory = irecv[3]; 1732 info->mallocs = irecv[4]; 1733 } 1734 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1735 info->fill_ratio_needed = 0; 1736 info->factor_mallocs = 0; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatSetOption_MPIAIJ" 1742 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1743 { 1744 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 switch (op) { 1749 case MAT_NEW_NONZERO_LOCATIONS: 1750 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1751 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1752 case MAT_KEEP_NONZERO_PATTERN: 1753 case MAT_NEW_NONZERO_LOCATION_ERR: 1754 case MAT_USE_INODES: 1755 case MAT_IGNORE_ZERO_ENTRIES: 1756 MatCheckPreallocated(A,1); 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1759 break; 1760 case MAT_ROW_ORIENTED: 1761 MatCheckPreallocated(A,1); 1762 a->roworiented = flg; 1763 1764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1765 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1766 break; 1767 case MAT_NEW_DIAGONALS: 1768 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1769 break; 1770 case MAT_IGNORE_OFF_PROC_ENTRIES: 1771 a->donotstash = flg; 1772 break; 1773 case MAT_SPD: 1774 A->spd_set = PETSC_TRUE; 1775 A->spd = flg; 1776 if (flg) { 1777 A->symmetric = PETSC_TRUE; 1778 A->structurally_symmetric = PETSC_TRUE; 1779 A->symmetric_set = PETSC_TRUE; 1780 A->structurally_symmetric_set = PETSC_TRUE; 1781 } 1782 break; 1783 case MAT_SYMMETRIC: 1784 MatCheckPreallocated(A,1); 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 MatCheckPreallocated(A,1); 1789 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1790 break; 1791 case MAT_HERMITIAN: 1792 MatCheckPreallocated(A,1); 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 break; 1795 case MAT_SYMMETRY_ETERNAL: 1796 MatCheckPreallocated(A,1); 1797 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1798 break; 1799 default: 1800 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1801 } 1802 PetscFunctionReturn(0); 1803 } 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatGetRow_MPIAIJ" 1807 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1808 { 1809 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1810 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1811 PetscErrorCode ierr; 1812 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1813 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1814 PetscInt *cmap,*idx_p; 1815 1816 PetscFunctionBegin; 1817 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1818 mat->getrowactive = PETSC_TRUE; 1819 1820 if (!mat->rowvalues && (idx || v)) { 1821 /* 1822 allocate enough space to hold information from the longest row. 1823 */ 1824 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1825 PetscInt max = 1,tmp; 1826 for (i=0; i<matin->rmap->n; i++) { 1827 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1828 if (max < tmp) max = tmp; 1829 } 1830 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1831 } 1832 1833 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1834 lrow = row - rstart; 1835 1836 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1837 if (!v) {pvA = 0; pvB = 0;} 1838 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1839 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1840 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1841 nztot = nzA + nzB; 1842 1843 cmap = mat->garray; 1844 if (v || idx) { 1845 if (nztot) { 1846 /* Sort by increasing column numbers, assuming A and B already sorted */ 1847 PetscInt imark = -1; 1848 if (v) { 1849 *v = v_p = mat->rowvalues; 1850 for (i=0; i<nzB; i++) { 1851 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1852 else break; 1853 } 1854 imark = i; 1855 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1856 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1857 } 1858 if (idx) { 1859 *idx = idx_p = mat->rowindices; 1860 if (imark > -1) { 1861 for (i=0; i<imark; i++) { 1862 idx_p[i] = cmap[cworkB[i]]; 1863 } 1864 } else { 1865 for (i=0; i<nzB; i++) { 1866 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1867 else break; 1868 } 1869 imark = i; 1870 } 1871 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1872 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1873 } 1874 } else { 1875 if (idx) *idx = 0; 1876 if (v) *v = 0; 1877 } 1878 } 1879 *nz = nztot; 1880 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1881 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1887 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1888 { 1889 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1890 1891 PetscFunctionBegin; 1892 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1893 aij->getrowactive = PETSC_FALSE; 1894 PetscFunctionReturn(0); 1895 } 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatNorm_MPIAIJ" 1899 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1900 { 1901 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1902 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1903 PetscErrorCode ierr; 1904 PetscInt i,j,cstart = mat->cmap->rstart; 1905 PetscReal sum = 0.0; 1906 MatScalar *v; 1907 1908 PetscFunctionBegin; 1909 if (aij->size == 1) { 1910 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1911 } else { 1912 if (type == NORM_FROBENIUS) { 1913 v = amat->a; 1914 for (i=0; i<amat->nz; i++) { 1915 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1916 } 1917 v = bmat->a; 1918 for (i=0; i<bmat->nz; i++) { 1919 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1920 } 1921 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1922 *norm = PetscSqrtReal(*norm); 1923 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1924 } else if (type == NORM_1) { /* max column norm */ 1925 PetscReal *tmp,*tmp2; 1926 PetscInt *jj,*garray = aij->garray; 1927 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1928 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1929 *norm = 0.0; 1930 v = amat->a; jj = amat->j; 1931 for (j=0; j<amat->nz; j++) { 1932 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1933 } 1934 v = bmat->a; jj = bmat->j; 1935 for (j=0; j<bmat->nz; j++) { 1936 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1937 } 1938 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1939 for (j=0; j<mat->cmap->N; j++) { 1940 if (tmp2[j] > *norm) *norm = tmp2[j]; 1941 } 1942 ierr = PetscFree(tmp);CHKERRQ(ierr); 1943 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1944 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1945 } else if (type == NORM_INFINITY) { /* max row norm */ 1946 PetscReal ntemp = 0.0; 1947 for (j=0; j<aij->A->rmap->n; j++) { 1948 v = amat->a + amat->i[j]; 1949 sum = 0.0; 1950 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1951 sum += PetscAbsScalar(*v); v++; 1952 } 1953 v = bmat->a + bmat->i[j]; 1954 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1955 sum += PetscAbsScalar(*v); v++; 1956 } 1957 if (sum > ntemp) ntemp = sum; 1958 } 1959 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1960 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1961 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 #undef __FUNCT__ 1967 #define __FUNCT__ "MatTranspose_MPIAIJ" 1968 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1969 { 1970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1971 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1972 PetscErrorCode ierr; 1973 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1974 PetscInt cstart = A->cmap->rstart,ncol; 1975 Mat B; 1976 MatScalar *array; 1977 1978 PetscFunctionBegin; 1979 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1980 1981 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1982 ai = Aloc->i; aj = Aloc->j; 1983 bi = Bloc->i; bj = Bloc->j; 1984 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1985 PetscInt *d_nnz,*g_nnz,*o_nnz; 1986 PetscSFNode *oloc; 1987 PETSC_UNUSED PetscSF sf; 1988 1989 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1990 /* compute d_nnz for preallocation */ 1991 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<ai[ma]; i++) { 1993 d_nnz[aj[i]]++; 1994 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1995 } 1996 /* compute local off-diagonal contributions */ 1997 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1998 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1999 /* map those to global */ 2000 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2001 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2002 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2003 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2004 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2005 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2006 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2007 2008 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2009 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2010 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2011 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2012 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2013 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2014 } else { 2015 B = *matout; 2016 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2017 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2018 } 2019 2020 /* copy over the A part */ 2021 array = Aloc->a; 2022 row = A->rmap->rstart; 2023 for (i=0; i<ma; i++) { 2024 ncol = ai[i+1]-ai[i]; 2025 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2026 row++; 2027 array += ncol; aj += ncol; 2028 } 2029 aj = Aloc->j; 2030 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2031 2032 /* copy over the B part */ 2033 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2034 array = Bloc->a; 2035 row = A->rmap->rstart; 2036 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2037 cols_tmp = cols; 2038 for (i=0; i<mb; i++) { 2039 ncol = bi[i+1]-bi[i]; 2040 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2041 row++; 2042 array += ncol; cols_tmp += ncol; 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 2046 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2047 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2048 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2049 *matout = B; 2050 } else { 2051 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2052 } 2053 PetscFunctionReturn(0); 2054 } 2055 2056 #undef __FUNCT__ 2057 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2058 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2059 { 2060 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2061 Mat a = aij->A,b = aij->B; 2062 PetscErrorCode ierr; 2063 PetscInt s1,s2,s3; 2064 2065 PetscFunctionBegin; 2066 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2067 if (rr) { 2068 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2069 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2070 /* Overlap communication with computation. */ 2071 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 } 2073 if (ll) { 2074 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2075 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2076 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2077 } 2078 /* scale the diagonal block */ 2079 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2080 2081 if (rr) { 2082 /* Do a scatter end and then right scale the off-diagonal block */ 2083 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2084 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2091 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2092 { 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 PetscErrorCode ierr; 2095 2096 PetscFunctionBegin; 2097 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "MatEqual_MPIAIJ" 2103 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2104 { 2105 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2106 Mat a,b,c,d; 2107 PetscBool flg; 2108 PetscErrorCode ierr; 2109 2110 PetscFunctionBegin; 2111 a = matA->A; b = matA->B; 2112 c = matB->A; d = matB->B; 2113 2114 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2115 if (flg) { 2116 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2117 } 2118 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatCopy_MPIAIJ" 2124 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2125 { 2126 PetscErrorCode ierr; 2127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2128 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2129 2130 PetscFunctionBegin; 2131 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2132 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2133 /* because of the column compression in the off-processor part of the matrix a->B, 2134 the number of columns in a->B and b->B may be different, hence we cannot call 2135 the MatCopy() directly on the two parts. If need be, we can provide a more 2136 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2137 then copying the submatrices */ 2138 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2139 } else { 2140 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2141 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2142 } 2143 PetscFunctionReturn(0); 2144 } 2145 2146 #undef __FUNCT__ 2147 #define __FUNCT__ "MatSetUp_MPIAIJ" 2148 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2149 { 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /* 2158 Computes the number of nonzeros per row needed for preallocation when X and Y 2159 have different nonzero structure. 2160 */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2163 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2164 { 2165 PetscInt i,j,k,nzx,nzy; 2166 2167 PetscFunctionBegin; 2168 /* Set the number of nonzeros in the new matrix */ 2169 for (i=0; i<m; i++) { 2170 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2171 nzx = xi[i+1] - xi[i]; 2172 nzy = yi[i+1] - yi[i]; 2173 nnz[i] = 0; 2174 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2175 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2176 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2177 nnz[i]++; 2178 } 2179 for (; k<nzy; k++) nnz[i]++; 2180 } 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2185 #undef __FUNCT__ 2186 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2187 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2188 { 2189 PetscErrorCode ierr; 2190 PetscInt m = Y->rmap->N; 2191 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2192 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2193 2194 PetscFunctionBegin; 2195 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatAXPY_MPIAIJ" 2201 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2202 { 2203 PetscErrorCode ierr; 2204 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2205 PetscBLASInt bnz,one=1; 2206 Mat_SeqAIJ *x,*y; 2207 2208 PetscFunctionBegin; 2209 if (str == SAME_NONZERO_PATTERN) { 2210 PetscScalar alpha = a; 2211 x = (Mat_SeqAIJ*)xx->A->data; 2212 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2213 y = (Mat_SeqAIJ*)yy->A->data; 2214 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2215 x = (Mat_SeqAIJ*)xx->B->data; 2216 y = (Mat_SeqAIJ*)yy->B->data; 2217 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2218 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2219 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2220 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2221 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2222 } else { 2223 Mat B; 2224 PetscInt *nnz_d,*nnz_o; 2225 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2226 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2227 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2228 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2229 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2230 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2231 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2232 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2233 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2234 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2235 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2236 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2237 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2238 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2239 } 2240 PetscFunctionReturn(0); 2241 } 2242 2243 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatConjugate_MPIAIJ" 2247 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2248 { 2249 #if defined(PETSC_USE_COMPLEX) 2250 PetscErrorCode ierr; 2251 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2252 2253 PetscFunctionBegin; 2254 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2255 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2256 #else 2257 PetscFunctionBegin; 2258 #endif 2259 PetscFunctionReturn(0); 2260 } 2261 2262 #undef __FUNCT__ 2263 #define __FUNCT__ "MatRealPart_MPIAIJ" 2264 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2265 { 2266 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2267 PetscErrorCode ierr; 2268 2269 PetscFunctionBegin; 2270 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2271 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2272 PetscFunctionReturn(0); 2273 } 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2277 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2278 { 2279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2284 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2290 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2291 { 2292 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2293 PetscErrorCode ierr; 2294 PetscInt i,*idxb = 0; 2295 PetscScalar *va,*vb; 2296 Vec vtmp; 2297 2298 PetscFunctionBegin; 2299 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2300 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2301 if (idx) { 2302 for (i=0; i<A->rmap->n; i++) { 2303 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2304 } 2305 } 2306 2307 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2308 if (idx) { 2309 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2310 } 2311 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2312 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2313 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2316 va[i] = vb[i]; 2317 if (idx) idx[i] = a->garray[idxb[i]]; 2318 } 2319 } 2320 2321 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2322 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2323 ierr = PetscFree(idxb);CHKERRQ(ierr); 2324 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2330 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2331 { 2332 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2333 PetscErrorCode ierr; 2334 PetscInt i,*idxb = 0; 2335 PetscScalar *va,*vb; 2336 Vec vtmp; 2337 2338 PetscFunctionBegin; 2339 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2341 if (idx) { 2342 for (i=0; i<A->cmap->n; i++) { 2343 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2344 } 2345 } 2346 2347 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2348 if (idx) { 2349 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2350 } 2351 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2352 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2353 2354 for (i=0; i<A->rmap->n; i++) { 2355 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2356 va[i] = vb[i]; 2357 if (idx) idx[i] = a->garray[idxb[i]]; 2358 } 2359 } 2360 2361 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2362 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2363 ierr = PetscFree(idxb);CHKERRQ(ierr); 2364 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 #undef __FUNCT__ 2369 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2370 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2371 { 2372 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2373 PetscInt n = A->rmap->n; 2374 PetscInt cstart = A->cmap->rstart; 2375 PetscInt *cmap = mat->garray; 2376 PetscInt *diagIdx, *offdiagIdx; 2377 Vec diagV, offdiagV; 2378 PetscScalar *a, *diagA, *offdiagA; 2379 PetscInt r; 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2384 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2385 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2386 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2387 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2388 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2389 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2390 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2391 for (r = 0; r < n; ++r) { 2392 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2393 a[r] = diagA[r]; 2394 idx[r] = cstart + diagIdx[r]; 2395 } else { 2396 a[r] = offdiagA[r]; 2397 idx[r] = cmap[offdiagIdx[r]]; 2398 } 2399 } 2400 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2401 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2402 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2403 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2404 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2405 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2411 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2412 { 2413 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2414 PetscInt n = A->rmap->n; 2415 PetscInt cstart = A->cmap->rstart; 2416 PetscInt *cmap = mat->garray; 2417 PetscInt *diagIdx, *offdiagIdx; 2418 Vec diagV, offdiagV; 2419 PetscScalar *a, *diagA, *offdiagA; 2420 PetscInt r; 2421 PetscErrorCode ierr; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2425 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2426 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2427 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2428 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2429 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2430 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2431 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2432 for (r = 0; r < n; ++r) { 2433 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2434 a[r] = diagA[r]; 2435 idx[r] = cstart + diagIdx[r]; 2436 } else { 2437 a[r] = offdiagA[r]; 2438 idx[r] = cmap[offdiagIdx[r]]; 2439 } 2440 } 2441 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2442 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2443 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2444 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2445 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2446 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2447 PetscFunctionReturn(0); 2448 } 2449 2450 #undef __FUNCT__ 2451 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2452 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2453 { 2454 PetscErrorCode ierr; 2455 Mat *dummy; 2456 2457 PetscFunctionBegin; 2458 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2459 *newmat = *dummy; 2460 ierr = PetscFree(dummy);CHKERRQ(ierr); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2466 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2467 { 2468 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2473 A->errortype = a->A->errortype; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2486 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2494 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2495 { 2496 PetscFunctionBegin; 2497 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2498 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2499 PetscFunctionReturn(0); 2500 } 2501 2502 #undef __FUNCT__ 2503 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2504 /*@ 2505 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2506 2507 Collective on Mat 2508 2509 Input Parameters: 2510 + A - the matrix 2511 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2512 2513 Level: advanced 2514 2515 @*/ 2516 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2517 { 2518 PetscErrorCode ierr; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2528 { 2529 PetscErrorCode ierr; 2530 PetscBool sc = PETSC_FALSE,flg; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2534 ierr = PetscObjectOptionsBegin((PetscObject)A); 2535 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2536 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2537 if (flg) { 2538 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2539 } 2540 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatShift_MPIAIJ" 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #undef __FUNCT__ 2565 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2566 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2569 PetscErrorCode ierr; 2570 2571 PetscFunctionBegin; 2572 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2573 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2574 if (d) { 2575 PetscInt rstart; 2576 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2577 *d += rstart; 2578 2579 } 2580 PetscFunctionReturn(0); 2581 } 2582 2583 2584 /* -------------------------------------------------------------------*/ 2585 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2586 MatGetRow_MPIAIJ, 2587 MatRestoreRow_MPIAIJ, 2588 MatMult_MPIAIJ, 2589 /* 4*/ MatMultAdd_MPIAIJ, 2590 MatMultTranspose_MPIAIJ, 2591 MatMultTransposeAdd_MPIAIJ, 2592 0, 2593 0, 2594 0, 2595 /*10*/ 0, 2596 0, 2597 0, 2598 MatSOR_MPIAIJ, 2599 MatTranspose_MPIAIJ, 2600 /*15*/ MatGetInfo_MPIAIJ, 2601 MatEqual_MPIAIJ, 2602 MatGetDiagonal_MPIAIJ, 2603 MatDiagonalScale_MPIAIJ, 2604 MatNorm_MPIAIJ, 2605 /*20*/ MatAssemblyBegin_MPIAIJ, 2606 MatAssemblyEnd_MPIAIJ, 2607 MatSetOption_MPIAIJ, 2608 MatZeroEntries_MPIAIJ, 2609 /*24*/ MatZeroRows_MPIAIJ, 2610 0, 2611 0, 2612 0, 2613 0, 2614 /*29*/ MatSetUp_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*34*/ MatDuplicate_MPIAIJ, 2620 0, 2621 0, 2622 0, 2623 0, 2624 /*39*/ MatAXPY_MPIAIJ, 2625 MatGetSubMatrices_MPIAIJ, 2626 MatIncreaseOverlap_MPIAIJ, 2627 MatGetValues_MPIAIJ, 2628 MatCopy_MPIAIJ, 2629 /*44*/ MatGetRowMax_MPIAIJ, 2630 MatScale_MPIAIJ, 2631 MatShift_MPIAIJ, 2632 MatDiagonalSet_MPIAIJ, 2633 MatZeroRowsColumns_MPIAIJ, 2634 /*49*/ MatSetRandom_MPIAIJ, 2635 0, 2636 0, 2637 0, 2638 0, 2639 /*54*/ MatFDColoringCreate_MPIXAIJ, 2640 0, 2641 MatSetUnfactored_MPIAIJ, 2642 MatPermute_MPIAIJ, 2643 0, 2644 /*59*/ MatGetSubMatrix_MPIAIJ, 2645 MatDestroy_MPIAIJ, 2646 MatView_MPIAIJ, 2647 0, 2648 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2649 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2650 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2655 MatGetRowMinAbs_MPIAIJ, 2656 0, 2657 MatSetColoring_MPIAIJ, 2658 0, 2659 MatSetValuesAdifor_MPIAIJ, 2660 /*75*/ MatFDColoringApply_AIJ, 2661 MatSetFromOptions_MPIAIJ, 2662 0, 2663 0, 2664 MatFindZeroDiagonals_MPIAIJ, 2665 /*80*/ 0, 2666 0, 2667 0, 2668 /*83*/ MatLoad_MPIAIJ, 2669 0, 2670 0, 2671 0, 2672 0, 2673 0, 2674 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2677 MatPtAP_MPIAIJ_MPIAIJ, 2678 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2679 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 0, 2683 0, 2684 /*99*/ 0, 2685 0, 2686 0, 2687 MatConjugate_MPIAIJ, 2688 0, 2689 /*104*/MatSetValuesRow_MPIAIJ, 2690 MatRealPart_MPIAIJ, 2691 MatImaginaryPart_MPIAIJ, 2692 0, 2693 0, 2694 /*109*/0, 2695 0, 2696 MatGetRowMin_MPIAIJ, 2697 0, 2698 MatMissingDiagonal_MPIAIJ, 2699 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2700 0, 2701 MatGetGhosts_MPIAIJ, 2702 0, 2703 0, 2704 /*119*/0, 2705 0, 2706 0, 2707 0, 2708 MatGetMultiProcBlock_MPIAIJ, 2709 /*124*/MatFindNonzeroRows_MPIAIJ, 2710 MatGetColumnNorms_MPIAIJ, 2711 MatInvertBlockDiagonal_MPIAIJ, 2712 0, 2713 MatGetSubMatricesMPI_MPIAIJ, 2714 /*129*/0, 2715 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2716 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2718 0, 2719 /*134*/0, 2720 0, 2721 0, 2722 0, 2723 0, 2724 /*139*/0, 2725 0, 2726 0, 2727 MatFDColoringSetUp_MPIXAIJ, 2728 MatFindOffBlockDiagonalEntries_MPIAIJ, 2729 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2730 }; 2731 2732 /* ----------------------------------------------------------------------------------------*/ 2733 2734 #undef __FUNCT__ 2735 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2736 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2737 { 2738 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2739 PetscErrorCode ierr; 2740 2741 PetscFunctionBegin; 2742 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2743 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2749 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2750 { 2751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2756 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2762 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2763 { 2764 Mat_MPIAIJ *b; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2769 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2770 b = (Mat_MPIAIJ*)B->data; 2771 2772 if (!B->preallocated) { 2773 /* Explicitly create 2 MATSEQAIJ matrices. */ 2774 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2775 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2776 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2777 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2778 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2779 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2780 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2782 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2783 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2784 } 2785 2786 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2787 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2788 B->preallocated = PETSC_TRUE; 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2794 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2795 { 2796 Mat mat; 2797 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2798 PetscErrorCode ierr; 2799 2800 PetscFunctionBegin; 2801 *newmat = 0; 2802 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2803 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2804 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2805 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2806 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2807 a = (Mat_MPIAIJ*)mat->data; 2808 2809 mat->factortype = matin->factortype; 2810 mat->assembled = PETSC_TRUE; 2811 mat->insertmode = NOT_SET_VALUES; 2812 mat->preallocated = PETSC_TRUE; 2813 2814 a->size = oldmat->size; 2815 a->rank = oldmat->rank; 2816 a->donotstash = oldmat->donotstash; 2817 a->roworiented = oldmat->roworiented; 2818 a->rowindices = 0; 2819 a->rowvalues = 0; 2820 a->getrowactive = PETSC_FALSE; 2821 2822 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2823 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2824 2825 if (oldmat->colmap) { 2826 #if defined(PETSC_USE_CTABLE) 2827 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2828 #else 2829 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2830 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2831 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2832 #endif 2833 } else a->colmap = 0; 2834 if (oldmat->garray) { 2835 PetscInt len; 2836 len = oldmat->B->cmap->n; 2837 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2838 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2839 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2840 } else a->garray = 0; 2841 2842 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2844 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2846 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2848 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2849 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2850 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2851 *newmat = mat; 2852 PetscFunctionReturn(0); 2853 } 2854 2855 2856 2857 #undef __FUNCT__ 2858 #define __FUNCT__ "MatLoad_MPIAIJ" 2859 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2860 { 2861 PetscScalar *vals,*svals; 2862 MPI_Comm comm; 2863 PetscErrorCode ierr; 2864 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2865 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2866 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2867 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2868 PetscInt cend,cstart,n,*rowners; 2869 int fd; 2870 PetscInt bs = newMat->rmap->bs; 2871 2872 PetscFunctionBegin; 2873 /* force binary viewer to load .info file if it has not yet done so */ 2874 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2875 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2878 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2879 if (!rank) { 2880 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2881 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2882 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MPIAIJ"); 2883 } 2884 2885 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIAIJ matrix","Mat");CHKERRQ(ierr); 2886 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2887 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2888 if (bs < 0) bs = 1; 2889 2890 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2891 M = header[1]; N = header[2]; 2892 2893 /* If global sizes are set, check if they are consistent with that given in the file */ 2894 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2895 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2896 2897 /* determine ownership of all (block) rows */ 2898 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2899 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2900 else m = newMat->rmap->n; /* Set by user */ 2901 2902 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2903 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2904 2905 /* First process needs enough room for process with most rows */ 2906 if (!rank) { 2907 mmax = rowners[1]; 2908 for (i=2; i<=size; i++) { 2909 mmax = PetscMax(mmax, rowners[i]); 2910 } 2911 } else mmax = -1; /* unused, but compilers complain */ 2912 2913 rowners[0] = 0; 2914 for (i=2; i<=size; i++) { 2915 rowners[i] += rowners[i-1]; 2916 } 2917 rstart = rowners[rank]; 2918 rend = rowners[rank+1]; 2919 2920 /* distribute row lengths to all processors */ 2921 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2922 if (!rank) { 2923 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2924 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2925 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2926 for (j=0; j<m; j++) { 2927 procsnz[0] += ourlens[j]; 2928 } 2929 for (i=1; i<size; i++) { 2930 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2931 /* calculate the number of nonzeros on each processor */ 2932 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2933 procsnz[i] += rowlengths[j]; 2934 } 2935 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2936 } 2937 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2938 } else { 2939 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2940 } 2941 2942 if (!rank) { 2943 /* determine max buffer needed and allocate it */ 2944 maxnz = 0; 2945 for (i=0; i<size; i++) { 2946 maxnz = PetscMax(maxnz,procsnz[i]); 2947 } 2948 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2949 2950 /* read in my part of the matrix column indices */ 2951 nz = procsnz[0]; 2952 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2953 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2954 2955 /* read in every one elses and ship off */ 2956 for (i=1; i<size; i++) { 2957 nz = procsnz[i]; 2958 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2959 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2960 } 2961 ierr = PetscFree(cols);CHKERRQ(ierr); 2962 } else { 2963 /* determine buffer space needed for message */ 2964 nz = 0; 2965 for (i=0; i<m; i++) { 2966 nz += ourlens[i]; 2967 } 2968 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2969 2970 /* receive message of column indices*/ 2971 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2972 } 2973 2974 /* determine column ownership if matrix is not square */ 2975 if (N != M) { 2976 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2977 else n = newMat->cmap->n; 2978 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2979 cstart = cend - n; 2980 } else { 2981 cstart = rstart; 2982 cend = rend; 2983 n = cend - cstart; 2984 } 2985 2986 /* loop over local rows, determining number of off diagonal entries */ 2987 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2988 jj = 0; 2989 for (i=0; i<m; i++) { 2990 for (j=0; j<ourlens[i]; j++) { 2991 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2992 jj++; 2993 } 2994 } 2995 2996 for (i=0; i<m; i++) { 2997 ourlens[i] -= offlens[i]; 2998 } 2999 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3000 3001 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3002 3003 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3004 3005 for (i=0; i<m; i++) { 3006 ourlens[i] += offlens[i]; 3007 } 3008 3009 if (!rank) { 3010 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3011 3012 /* read in my part of the matrix numerical values */ 3013 nz = procsnz[0]; 3014 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3015 3016 /* insert into matrix */ 3017 jj = rstart; 3018 smycols = mycols; 3019 svals = vals; 3020 for (i=0; i<m; i++) { 3021 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3022 smycols += ourlens[i]; 3023 svals += ourlens[i]; 3024 jj++; 3025 } 3026 3027 /* read in other processors and ship out */ 3028 for (i=1; i<size; i++) { 3029 nz = procsnz[i]; 3030 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3031 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3032 } 3033 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3034 } else { 3035 /* receive numeric values */ 3036 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3037 3038 /* receive message of values*/ 3039 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3040 3041 /* insert into matrix */ 3042 jj = rstart; 3043 smycols = mycols; 3044 svals = vals; 3045 for (i=0; i<m; i++) { 3046 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3047 smycols += ourlens[i]; 3048 svals += ourlens[i]; 3049 jj++; 3050 } 3051 } 3052 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3053 ierr = PetscFree(vals);CHKERRQ(ierr); 3054 ierr = PetscFree(mycols);CHKERRQ(ierr); 3055 ierr = PetscFree(rowners);CHKERRQ(ierr); 3056 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3057 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3058 PetscFunctionReturn(0); 3059 } 3060 3061 #undef __FUNCT__ 3062 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3063 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3064 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3065 { 3066 PetscErrorCode ierr; 3067 IS iscol_local; 3068 PetscInt csize; 3069 3070 PetscFunctionBegin; 3071 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3072 if (call == MAT_REUSE_MATRIX) { 3073 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3074 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3075 } else { 3076 /* check if we are grabbing all columns*/ 3077 PetscBool isstride; 3078 PetscMPIInt lisstride = 0,gisstride; 3079 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3080 if (isstride) { 3081 PetscInt start,len,mstart,mlen; 3082 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3083 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3084 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3085 if (mstart == start && mlen-mstart == len) lisstride = 1; 3086 } 3087 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3088 if (gisstride) { 3089 PetscInt N; 3090 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3091 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3092 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3093 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3094 } else { 3095 PetscInt cbs; 3096 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3097 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3098 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3099 } 3100 } 3101 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3102 if (call == MAT_INITIAL_MATRIX) { 3103 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3104 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3105 } 3106 PetscFunctionReturn(0); 3107 } 3108 3109 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3110 #undef __FUNCT__ 3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3112 /* 3113 Not great since it makes two copies of the submatrix, first an SeqAIJ 3114 in local and then by concatenating the local matrices the end result. 3115 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3116 3117 Note: This requires a sequential iscol with all indices. 3118 */ 3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3120 { 3121 PetscErrorCode ierr; 3122 PetscMPIInt rank,size; 3123 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3124 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3125 PetscBool allcolumns, colflag; 3126 Mat M,Mreuse; 3127 MatScalar *vwork,*aa; 3128 MPI_Comm comm; 3129 Mat_SeqAIJ *aij; 3130 3131 PetscFunctionBegin; 3132 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3133 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3134 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3135 3136 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3137 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3138 if (colflag && ncol == mat->cmap->N) { 3139 allcolumns = PETSC_TRUE; 3140 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3141 } else { 3142 allcolumns = PETSC_FALSE; 3143 } 3144 if (call == MAT_REUSE_MATRIX) { 3145 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3146 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3147 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3148 } else { 3149 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3150 } 3151 3152 /* 3153 m - number of local rows 3154 n - number of columns (same on all processors) 3155 rstart - first row in new global matrix generated 3156 */ 3157 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3158 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3159 if (call == MAT_INITIAL_MATRIX) { 3160 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3161 ii = aij->i; 3162 jj = aij->j; 3163 3164 /* 3165 Determine the number of non-zeros in the diagonal and off-diagonal 3166 portions of the matrix in order to do correct preallocation 3167 */ 3168 3169 /* first get start and end of "diagonal" columns */ 3170 if (csize == PETSC_DECIDE) { 3171 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3172 if (mglobal == n) { /* square matrix */ 3173 nlocal = m; 3174 } else { 3175 nlocal = n/size + ((n % size) > rank); 3176 } 3177 } else { 3178 nlocal = csize; 3179 } 3180 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3181 rstart = rend - nlocal; 3182 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3183 3184 /* next, compute all the lengths */ 3185 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3186 olens = dlens + m; 3187 for (i=0; i<m; i++) { 3188 jend = ii[i+1] - ii[i]; 3189 olen = 0; 3190 dlen = 0; 3191 for (j=0; j<jend; j++) { 3192 if (*jj < rstart || *jj >= rend) olen++; 3193 else dlen++; 3194 jj++; 3195 } 3196 olens[i] = olen; 3197 dlens[i] = dlen; 3198 } 3199 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3200 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3201 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3202 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3203 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3204 ierr = PetscFree(dlens);CHKERRQ(ierr); 3205 } else { 3206 PetscInt ml,nl; 3207 3208 M = *newmat; 3209 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3210 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3211 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3212 /* 3213 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3214 rather than the slower MatSetValues(). 3215 */ 3216 M->was_assembled = PETSC_TRUE; 3217 M->assembled = PETSC_FALSE; 3218 } 3219 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3220 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3221 ii = aij->i; 3222 jj = aij->j; 3223 aa = aij->a; 3224 for (i=0; i<m; i++) { 3225 row = rstart + i; 3226 nz = ii[i+1] - ii[i]; 3227 cwork = jj; jj += nz; 3228 vwork = aa; aa += nz; 3229 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3230 } 3231 3232 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3234 *newmat = M; 3235 3236 /* save submatrix used in processor for next request */ 3237 if (call == MAT_INITIAL_MATRIX) { 3238 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3239 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3240 } 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3246 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3247 { 3248 PetscInt m,cstart, cend,j,nnz,i,d; 3249 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3250 const PetscInt *JJ; 3251 PetscScalar *values; 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3256 3257 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3258 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3259 m = B->rmap->n; 3260 cstart = B->cmap->rstart; 3261 cend = B->cmap->rend; 3262 rstart = B->rmap->rstart; 3263 3264 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3265 3266 #if defined(PETSC_USE_DEBUGGING) 3267 for (i=0; i<m; i++) { 3268 nnz = Ii[i+1]- Ii[i]; 3269 JJ = J + Ii[i]; 3270 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3271 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3272 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3273 } 3274 #endif 3275 3276 for (i=0; i<m; i++) { 3277 nnz = Ii[i+1]- Ii[i]; 3278 JJ = J + Ii[i]; 3279 nnz_max = PetscMax(nnz_max,nnz); 3280 d = 0; 3281 for (j=0; j<nnz; j++) { 3282 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3283 } 3284 d_nnz[i] = d; 3285 o_nnz[i] = nnz - d; 3286 } 3287 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3288 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3289 3290 if (v) values = (PetscScalar*)v; 3291 else { 3292 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3293 } 3294 3295 for (i=0; i<m; i++) { 3296 ii = i + rstart; 3297 nnz = Ii[i+1]- Ii[i]; 3298 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3299 } 3300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3302 3303 if (!v) { 3304 ierr = PetscFree(values);CHKERRQ(ierr); 3305 } 3306 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3307 PetscFunctionReturn(0); 3308 } 3309 3310 #undef __FUNCT__ 3311 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3312 /*@ 3313 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3314 (the default parallel PETSc format). 3315 3316 Collective on MPI_Comm 3317 3318 Input Parameters: 3319 + B - the matrix 3320 . i - the indices into j for the start of each local row (starts with zero) 3321 . j - the column indices for each local row (starts with zero) 3322 - v - optional values in the matrix 3323 3324 Level: developer 3325 3326 Notes: 3327 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3328 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3329 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3330 3331 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3332 3333 The format which is used for the sparse matrix input, is equivalent to a 3334 row-major ordering.. i.e for the following matrix, the input data expected is 3335 as shown 3336 3337 $ 1 0 0 3338 $ 2 0 3 P0 3339 $ ------- 3340 $ 4 5 6 P1 3341 $ 3342 $ Process0 [P0]: rows_owned=[0,1] 3343 $ i = {0,1,3} [size = nrow+1 = 2+1] 3344 $ j = {0,0,2} [size = 3] 3345 $ v = {1,2,3} [size = 3] 3346 $ 3347 $ Process1 [P1]: rows_owned=[2] 3348 $ i = {0,3} [size = nrow+1 = 1+1] 3349 $ j = {0,1,2} [size = 3] 3350 $ v = {4,5,6} [size = 3] 3351 3352 .keywords: matrix, aij, compressed row, sparse, parallel 3353 3354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3355 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3356 @*/ 3357 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3358 { 3359 PetscErrorCode ierr; 3360 3361 PetscFunctionBegin; 3362 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3363 PetscFunctionReturn(0); 3364 } 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3368 /*@C 3369 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3370 (the default parallel PETSc format). For good matrix assembly performance 3371 the user should preallocate the matrix storage by setting the parameters 3372 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3373 performance can be increased by more than a factor of 50. 3374 3375 Collective on MPI_Comm 3376 3377 Input Parameters: 3378 + B - the matrix 3379 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3380 (same value is used for all local rows) 3381 . d_nnz - array containing the number of nonzeros in the various rows of the 3382 DIAGONAL portion of the local submatrix (possibly different for each row) 3383 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3384 The size of this array is equal to the number of local rows, i.e 'm'. 3385 For matrices that will be factored, you must leave room for (and set) 3386 the diagonal entry even if it is zero. 3387 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3388 submatrix (same value is used for all local rows). 3389 - o_nnz - array containing the number of nonzeros in the various rows of the 3390 OFF-DIAGONAL portion of the local submatrix (possibly different for 3391 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3392 structure. The size of this array is equal to the number 3393 of local rows, i.e 'm'. 3394 3395 If the *_nnz parameter is given then the *_nz parameter is ignored 3396 3397 The AIJ format (also called the Yale sparse matrix format or 3398 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3399 storage. The stored row and column indices begin with zero. 3400 See Users-Manual: ch_mat for details. 3401 3402 The parallel matrix is partitioned such that the first m0 rows belong to 3403 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3404 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3405 3406 The DIAGONAL portion of the local submatrix of a processor can be defined 3407 as the submatrix which is obtained by extraction the part corresponding to 3408 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3409 first row that belongs to the processor, r2 is the last row belonging to 3410 the this processor, and c1-c2 is range of indices of the local part of a 3411 vector suitable for applying the matrix to. This is an mxn matrix. In the 3412 common case of a square matrix, the row and column ranges are the same and 3413 the DIAGONAL part is also square. The remaining portion of the local 3414 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3415 3416 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3417 3418 You can call MatGetInfo() to get information on how effective the preallocation was; 3419 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3420 You can also run with the option -info and look for messages with the string 3421 malloc in them to see if additional memory allocation was needed. 3422 3423 Example usage: 3424 3425 Consider the following 8x8 matrix with 34 non-zero values, that is 3426 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3427 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3428 as follows: 3429 3430 .vb 3431 1 2 0 | 0 3 0 | 0 4 3432 Proc0 0 5 6 | 7 0 0 | 8 0 3433 9 0 10 | 11 0 0 | 12 0 3434 ------------------------------------- 3435 13 0 14 | 15 16 17 | 0 0 3436 Proc1 0 18 0 | 19 20 21 | 0 0 3437 0 0 0 | 22 23 0 | 24 0 3438 ------------------------------------- 3439 Proc2 25 26 27 | 0 0 28 | 29 0 3440 30 0 0 | 31 32 33 | 0 34 3441 .ve 3442 3443 This can be represented as a collection of submatrices as: 3444 3445 .vb 3446 A B C 3447 D E F 3448 G H I 3449 .ve 3450 3451 Where the submatrices A,B,C are owned by proc0, D,E,F are 3452 owned by proc1, G,H,I are owned by proc2. 3453 3454 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3455 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3456 The 'M','N' parameters are 8,8, and have the same values on all procs. 3457 3458 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3459 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3460 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3461 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3462 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3463 matrix, ans [DF] as another SeqAIJ matrix. 3464 3465 When d_nz, o_nz parameters are specified, d_nz storage elements are 3466 allocated for every row of the local diagonal submatrix, and o_nz 3467 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3468 One way to choose d_nz and o_nz is to use the max nonzerors per local 3469 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3470 In this case, the values of d_nz,o_nz are: 3471 .vb 3472 proc0 : dnz = 2, o_nz = 2 3473 proc1 : dnz = 3, o_nz = 2 3474 proc2 : dnz = 1, o_nz = 4 3475 .ve 3476 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3477 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3478 for proc3. i.e we are using 12+15+10=37 storage locations to store 3479 34 values. 3480 3481 When d_nnz, o_nnz parameters are specified, the storage is specified 3482 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3483 In the above case the values for d_nnz,o_nnz are: 3484 .vb 3485 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3486 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3487 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3488 .ve 3489 Here the space allocated is sum of all the above values i.e 34, and 3490 hence pre-allocation is perfect. 3491 3492 Level: intermediate 3493 3494 .keywords: matrix, aij, compressed row, sparse, parallel 3495 3496 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3497 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3498 @*/ 3499 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3500 { 3501 PetscErrorCode ierr; 3502 3503 PetscFunctionBegin; 3504 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3505 PetscValidType(B,1); 3506 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3507 PetscFunctionReturn(0); 3508 } 3509 3510 #undef __FUNCT__ 3511 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3512 /*@ 3513 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3514 CSR format the local rows. 3515 3516 Collective on MPI_Comm 3517 3518 Input Parameters: 3519 + comm - MPI communicator 3520 . m - number of local rows (Cannot be PETSC_DECIDE) 3521 . n - This value should be the same as the local size used in creating the 3522 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3523 calculated if N is given) For square matrices n is almost always m. 3524 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3525 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3526 . i - row indices 3527 . j - column indices 3528 - a - matrix values 3529 3530 Output Parameter: 3531 . mat - the matrix 3532 3533 Level: intermediate 3534 3535 Notes: 3536 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3537 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3538 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3539 3540 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3541 3542 The format which is used for the sparse matrix input, is equivalent to a 3543 row-major ordering.. i.e for the following matrix, the input data expected is 3544 as shown 3545 3546 $ 1 0 0 3547 $ 2 0 3 P0 3548 $ ------- 3549 $ 4 5 6 P1 3550 $ 3551 $ Process0 [P0]: rows_owned=[0,1] 3552 $ i = {0,1,3} [size = nrow+1 = 2+1] 3553 $ j = {0,0,2} [size = 3] 3554 $ v = {1,2,3} [size = 3] 3555 $ 3556 $ Process1 [P1]: rows_owned=[2] 3557 $ i = {0,3} [size = nrow+1 = 1+1] 3558 $ j = {0,1,2} [size = 3] 3559 $ v = {4,5,6} [size = 3] 3560 3561 .keywords: matrix, aij, compressed row, sparse, parallel 3562 3563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3564 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3565 @*/ 3566 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3567 { 3568 PetscErrorCode ierr; 3569 3570 PetscFunctionBegin; 3571 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3572 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3573 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3574 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3575 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3576 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3577 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3578 PetscFunctionReturn(0); 3579 } 3580 3581 #undef __FUNCT__ 3582 #define __FUNCT__ "MatCreateAIJ" 3583 /*@C 3584 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3585 (the default parallel PETSc format). For good matrix assembly performance 3586 the user should preallocate the matrix storage by setting the parameters 3587 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3588 performance can be increased by more than a factor of 50. 3589 3590 Collective on MPI_Comm 3591 3592 Input Parameters: 3593 + comm - MPI communicator 3594 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3595 This value should be the same as the local size used in creating the 3596 y vector for the matrix-vector product y = Ax. 3597 . n - This value should be the same as the local size used in creating the 3598 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3599 calculated if N is given) For square matrices n is almost always m. 3600 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3601 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3602 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3603 (same value is used for all local rows) 3604 . d_nnz - array containing the number of nonzeros in the various rows of the 3605 DIAGONAL portion of the local submatrix (possibly different for each row) 3606 or NULL, if d_nz is used to specify the nonzero structure. 3607 The size of this array is equal to the number of local rows, i.e 'm'. 3608 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3609 submatrix (same value is used for all local rows). 3610 - o_nnz - array containing the number of nonzeros in the various rows of the 3611 OFF-DIAGONAL portion of the local submatrix (possibly different for 3612 each row) or NULL, if o_nz is used to specify the nonzero 3613 structure. The size of this array is equal to the number 3614 of local rows, i.e 'm'. 3615 3616 Output Parameter: 3617 . A - the matrix 3618 3619 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3620 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3621 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3622 3623 Notes: 3624 If the *_nnz parameter is given then the *_nz parameter is ignored 3625 3626 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3627 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3628 storage requirements for this matrix. 3629 3630 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3631 processor than it must be used on all processors that share the object for 3632 that argument. 3633 3634 The user MUST specify either the local or global matrix dimensions 3635 (possibly both). 3636 3637 The parallel matrix is partitioned across processors such that the 3638 first m0 rows belong to process 0, the next m1 rows belong to 3639 process 1, the next m2 rows belong to process 2 etc.. where 3640 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3641 values corresponding to [m x N] submatrix. 3642 3643 The columns are logically partitioned with the n0 columns belonging 3644 to 0th partition, the next n1 columns belonging to the next 3645 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3646 3647 The DIAGONAL portion of the local submatrix on any given processor 3648 is the submatrix corresponding to the rows and columns m,n 3649 corresponding to the given processor. i.e diagonal matrix on 3650 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3651 etc. The remaining portion of the local submatrix [m x (N-n)] 3652 constitute the OFF-DIAGONAL portion. The example below better 3653 illustrates this concept. 3654 3655 For a square global matrix we define each processor's diagonal portion 3656 to be its local rows and the corresponding columns (a square submatrix); 3657 each processor's off-diagonal portion encompasses the remainder of the 3658 local matrix (a rectangular submatrix). 3659 3660 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3661 3662 When calling this routine with a single process communicator, a matrix of 3663 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3664 type of communicator, use the construction mechanism 3665 .vb 3666 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3667 .ve 3668 3669 By default, this format uses inodes (identical nodes) when possible. 3670 We search for consecutive rows with the same nonzero structure, thereby 3671 reusing matrix information to achieve increased efficiency. 3672 3673 Options Database Keys: 3674 + -mat_no_inode - Do not use inodes 3675 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3676 - -mat_aij_oneindex - Internally use indexing starting at 1 3677 rather than 0. Note that when calling MatSetValues(), 3678 the user still MUST index entries starting at 0! 3679 3680 3681 Example usage: 3682 3683 Consider the following 8x8 matrix with 34 non-zero values, that is 3684 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3685 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3686 as follows 3687 3688 .vb 3689 1 2 0 | 0 3 0 | 0 4 3690 Proc0 0 5 6 | 7 0 0 | 8 0 3691 9 0 10 | 11 0 0 | 12 0 3692 ------------------------------------- 3693 13 0 14 | 15 16 17 | 0 0 3694 Proc1 0 18 0 | 19 20 21 | 0 0 3695 0 0 0 | 22 23 0 | 24 0 3696 ------------------------------------- 3697 Proc2 25 26 27 | 0 0 28 | 29 0 3698 30 0 0 | 31 32 33 | 0 34 3699 .ve 3700 3701 This can be represented as a collection of submatrices as 3702 3703 .vb 3704 A B C 3705 D E F 3706 G H I 3707 .ve 3708 3709 Where the submatrices A,B,C are owned by proc0, D,E,F are 3710 owned by proc1, G,H,I are owned by proc2. 3711 3712 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3713 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3714 The 'M','N' parameters are 8,8, and have the same values on all procs. 3715 3716 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3717 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3718 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3719 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3720 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3721 matrix, ans [DF] as another SeqAIJ matrix. 3722 3723 When d_nz, o_nz parameters are specified, d_nz storage elements are 3724 allocated for every row of the local diagonal submatrix, and o_nz 3725 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3726 One way to choose d_nz and o_nz is to use the max nonzerors per local 3727 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3728 In this case, the values of d_nz,o_nz are 3729 .vb 3730 proc0 : dnz = 2, o_nz = 2 3731 proc1 : dnz = 3, o_nz = 2 3732 proc2 : dnz = 1, o_nz = 4 3733 .ve 3734 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3735 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3736 for proc3. i.e we are using 12+15+10=37 storage locations to store 3737 34 values. 3738 3739 When d_nnz, o_nnz parameters are specified, the storage is specified 3740 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3741 In the above case the values for d_nnz,o_nnz are 3742 .vb 3743 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3744 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3745 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3746 .ve 3747 Here the space allocated is sum of all the above values i.e 34, and 3748 hence pre-allocation is perfect. 3749 3750 Level: intermediate 3751 3752 .keywords: matrix, aij, compressed row, sparse, parallel 3753 3754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3755 MPIAIJ, MatCreateMPIAIJWithArrays() 3756 @*/ 3757 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3758 { 3759 PetscErrorCode ierr; 3760 PetscMPIInt size; 3761 3762 PetscFunctionBegin; 3763 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3764 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3765 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3766 if (size > 1) { 3767 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3768 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3769 } else { 3770 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3771 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3772 } 3773 PetscFunctionReturn(0); 3774 } 3775 3776 #undef __FUNCT__ 3777 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3778 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3779 { 3780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3781 PetscBool flg; 3782 PetscErrorCode ierr; 3783 3784 PetscFunctionBegin; 3785 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3786 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MPIAIJ matrix as input"); 3787 if (Ad) *Ad = a->A; 3788 if (Ao) *Ao = a->B; 3789 if (colmap) *colmap = a->garray; 3790 PetscFunctionReturn(0); 3791 } 3792 3793 #undef __FUNCT__ 3794 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3795 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3796 { 3797 PetscErrorCode ierr; 3798 PetscInt i; 3799 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3800 3801 PetscFunctionBegin; 3802 if (coloring->ctype == IS_COLORING_GLOBAL) { 3803 ISColoringValue *allcolors,*colors; 3804 ISColoring ocoloring; 3805 3806 /* set coloring for diagonal portion */ 3807 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3808 3809 /* set coloring for off-diagonal portion */ 3810 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3811 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3812 for (i=0; i<a->B->cmap->n; i++) { 3813 colors[i] = allcolors[a->garray[i]]; 3814 } 3815 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3816 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3817 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3818 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3819 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3820 ISColoringValue *colors; 3821 PetscInt *larray; 3822 ISColoring ocoloring; 3823 3824 /* set coloring for diagonal portion */ 3825 ierr = PetscMalloc1(a->A->cmap->n+1,&larray);CHKERRQ(ierr); 3826 for (i=0; i<a->A->cmap->n; i++) { 3827 larray[i] = i + A->cmap->rstart; 3828 } 3829 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3830 ierr = PetscMalloc1(a->A->cmap->n+1,&colors);CHKERRQ(ierr); 3831 for (i=0; i<a->A->cmap->n; i++) { 3832 colors[i] = coloring->colors[larray[i]]; 3833 } 3834 ierr = PetscFree(larray);CHKERRQ(ierr); 3835 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3836 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3837 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3838 3839 /* set coloring for off-diagonal portion */ 3840 ierr = PetscMalloc1(a->B->cmap->n+1,&larray);CHKERRQ(ierr); 3841 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3842 ierr = PetscMalloc1(a->B->cmap->n+1,&colors);CHKERRQ(ierr); 3843 for (i=0; i<a->B->cmap->n; i++) { 3844 colors[i] = coloring->colors[larray[i]]; 3845 } 3846 ierr = PetscFree(larray);CHKERRQ(ierr); 3847 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,PETSC_OWN_POINTER,&ocoloring);CHKERRQ(ierr); 3848 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3849 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3850 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3851 PetscFunctionReturn(0); 3852 } 3853 3854 #undef __FUNCT__ 3855 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3856 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3857 { 3858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3859 PetscErrorCode ierr; 3860 3861 PetscFunctionBegin; 3862 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3863 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3864 PetscFunctionReturn(0); 3865 } 3866 3867 #undef __FUNCT__ 3868 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3869 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3870 { 3871 PetscErrorCode ierr; 3872 PetscInt m,N,i,rstart,nnz,Ii; 3873 PetscInt *indx; 3874 PetscScalar *values; 3875 3876 PetscFunctionBegin; 3877 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3878 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3879 PetscInt *dnz,*onz,sum,bs,cbs; 3880 3881 if (n == PETSC_DECIDE) { 3882 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3883 } 3884 /* Check sum(n) = N */ 3885 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3886 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3887 3888 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3889 rstart -= m; 3890 3891 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3892 for (i=0; i<m; i++) { 3893 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3894 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3895 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3896 } 3897 3898 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3899 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3900 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3901 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3902 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3903 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3904 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3905 } 3906 3907 /* numeric phase */ 3908 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3909 for (i=0; i<m; i++) { 3910 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3911 Ii = i + rstart; 3912 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3913 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3914 } 3915 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3916 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatFileSplit" 3922 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3923 { 3924 PetscErrorCode ierr; 3925 PetscMPIInt rank; 3926 PetscInt m,N,i,rstart,nnz; 3927 size_t len; 3928 const PetscInt *indx; 3929 PetscViewer out; 3930 char *name; 3931 Mat B; 3932 const PetscScalar *values; 3933 3934 PetscFunctionBegin; 3935 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3936 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3937 /* Should this be the type of the diagonal block of A? */ 3938 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3939 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3940 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3941 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3942 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3943 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3944 for (i=0; i<m; i++) { 3945 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3946 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3947 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3948 } 3949 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3950 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3951 3952 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3953 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3954 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3955 sprintf(name,"%s.%d",outfile,rank); 3956 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3957 ierr = PetscFree(name);CHKERRQ(ierr); 3958 ierr = MatView(B,out);CHKERRQ(ierr); 3959 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3960 ierr = MatDestroy(&B);CHKERRQ(ierr); 3961 PetscFunctionReturn(0); 3962 } 3963 3964 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3965 #undef __FUNCT__ 3966 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3967 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3968 { 3969 PetscErrorCode ierr; 3970 Mat_Merge_SeqsToMPI *merge; 3971 PetscContainer container; 3972 3973 PetscFunctionBegin; 3974 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3975 if (container) { 3976 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3977 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3978 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3979 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3980 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3981 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3982 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3983 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3984 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3985 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3986 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3987 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3988 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3989 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3990 ierr = PetscFree(merge);CHKERRQ(ierr); 3991 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3992 } 3993 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3994 PetscFunctionReturn(0); 3995 } 3996 3997 #include <../src/mat/utils/freespace.h> 3998 #include <petscbt.h> 3999 4000 #undef __FUNCT__ 4001 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4002 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4003 { 4004 PetscErrorCode ierr; 4005 MPI_Comm comm; 4006 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4007 PetscMPIInt size,rank,taga,*len_s; 4008 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4009 PetscInt proc,m; 4010 PetscInt **buf_ri,**buf_rj; 4011 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4012 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4013 MPI_Request *s_waits,*r_waits; 4014 MPI_Status *status; 4015 MatScalar *aa=a->a; 4016 MatScalar **abuf_r,*ba_i; 4017 Mat_Merge_SeqsToMPI *merge; 4018 PetscContainer container; 4019 4020 PetscFunctionBegin; 4021 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4022 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4023 4024 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4025 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4026 4027 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4028 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4029 4030 bi = merge->bi; 4031 bj = merge->bj; 4032 buf_ri = merge->buf_ri; 4033 buf_rj = merge->buf_rj; 4034 4035 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4036 owners = merge->rowmap->range; 4037 len_s = merge->len_s; 4038 4039 /* send and recv matrix values */ 4040 /*-----------------------------*/ 4041 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4042 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4043 4044 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4045 for (proc=0,k=0; proc<size; proc++) { 4046 if (!len_s[proc]) continue; 4047 i = owners[proc]; 4048 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4049 k++; 4050 } 4051 4052 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4053 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4054 ierr = PetscFree(status);CHKERRQ(ierr); 4055 4056 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4057 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4058 4059 /* insert mat values of mpimat */ 4060 /*----------------------------*/ 4061 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4062 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4063 4064 for (k=0; k<merge->nrecv; k++) { 4065 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4066 nrows = *(buf_ri_k[k]); 4067 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4068 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4069 } 4070 4071 /* set values of ba */ 4072 m = merge->rowmap->n; 4073 for (i=0; i<m; i++) { 4074 arow = owners[rank] + i; 4075 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4076 bnzi = bi[i+1] - bi[i]; 4077 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4078 4079 /* add local non-zero vals of this proc's seqmat into ba */ 4080 anzi = ai[arow+1] - ai[arow]; 4081 aj = a->j + ai[arow]; 4082 aa = a->a + ai[arow]; 4083 nextaj = 0; 4084 for (j=0; nextaj<anzi; j++) { 4085 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4086 ba_i[j] += aa[nextaj++]; 4087 } 4088 } 4089 4090 /* add received vals into ba */ 4091 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4092 /* i-th row */ 4093 if (i == *nextrow[k]) { 4094 anzi = *(nextai[k]+1) - *nextai[k]; 4095 aj = buf_rj[k] + *(nextai[k]); 4096 aa = abuf_r[k] + *(nextai[k]); 4097 nextaj = 0; 4098 for (j=0; nextaj<anzi; j++) { 4099 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4100 ba_i[j] += aa[nextaj++]; 4101 } 4102 } 4103 nextrow[k]++; nextai[k]++; 4104 } 4105 } 4106 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4107 } 4108 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4109 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4110 4111 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4112 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4113 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4114 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4115 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4116 PetscFunctionReturn(0); 4117 } 4118 4119 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4120 4121 #undef __FUNCT__ 4122 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4123 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4124 { 4125 PetscErrorCode ierr; 4126 Mat B_mpi; 4127 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4128 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4129 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4130 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4131 PetscInt len,proc,*dnz,*onz,bs,cbs; 4132 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4133 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4134 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4135 MPI_Status *status; 4136 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4137 PetscBT lnkbt; 4138 Mat_Merge_SeqsToMPI *merge; 4139 PetscContainer container; 4140 4141 PetscFunctionBegin; 4142 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4143 4144 /* make sure it is a PETSc comm */ 4145 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4146 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4147 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4148 4149 ierr = PetscNew(&merge);CHKERRQ(ierr); 4150 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4151 4152 /* determine row ownership */ 4153 /*---------------------------------------------------------*/ 4154 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4155 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4156 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4157 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4158 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4159 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4160 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4161 4162 m = merge->rowmap->n; 4163 owners = merge->rowmap->range; 4164 4165 /* determine the number of messages to send, their lengths */ 4166 /*---------------------------------------------------------*/ 4167 len_s = merge->len_s; 4168 4169 len = 0; /* length of buf_si[] */ 4170 merge->nsend = 0; 4171 for (proc=0; proc<size; proc++) { 4172 len_si[proc] = 0; 4173 if (proc == rank) { 4174 len_s[proc] = 0; 4175 } else { 4176 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4177 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4178 } 4179 if (len_s[proc]) { 4180 merge->nsend++; 4181 nrows = 0; 4182 for (i=owners[proc]; i<owners[proc+1]; i++) { 4183 if (ai[i+1] > ai[i]) nrows++; 4184 } 4185 len_si[proc] = 2*(nrows+1); 4186 len += len_si[proc]; 4187 } 4188 } 4189 4190 /* determine the number and length of messages to receive for ij-structure */ 4191 /*-------------------------------------------------------------------------*/ 4192 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4193 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4194 4195 /* post the Irecv of j-structure */ 4196 /*-------------------------------*/ 4197 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4198 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4199 4200 /* post the Isend of j-structure */ 4201 /*--------------------------------*/ 4202 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4203 4204 for (proc=0, k=0; proc<size; proc++) { 4205 if (!len_s[proc]) continue; 4206 i = owners[proc]; 4207 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4208 k++; 4209 } 4210 4211 /* receives and sends of j-structure are complete */ 4212 /*------------------------------------------------*/ 4213 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4214 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4215 4216 /* send and recv i-structure */ 4217 /*---------------------------*/ 4218 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4219 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4220 4221 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4222 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4223 for (proc=0,k=0; proc<size; proc++) { 4224 if (!len_s[proc]) continue; 4225 /* form outgoing message for i-structure: 4226 buf_si[0]: nrows to be sent 4227 [1:nrows]: row index (global) 4228 [nrows+1:2*nrows+1]: i-structure index 4229 */ 4230 /*-------------------------------------------*/ 4231 nrows = len_si[proc]/2 - 1; 4232 buf_si_i = buf_si + nrows+1; 4233 buf_si[0] = nrows; 4234 buf_si_i[0] = 0; 4235 nrows = 0; 4236 for (i=owners[proc]; i<owners[proc+1]; i++) { 4237 anzi = ai[i+1] - ai[i]; 4238 if (anzi) { 4239 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4240 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4241 nrows++; 4242 } 4243 } 4244 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4245 k++; 4246 buf_si += len_si[proc]; 4247 } 4248 4249 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4250 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4251 4252 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4253 for (i=0; i<merge->nrecv; i++) { 4254 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4255 } 4256 4257 ierr = PetscFree(len_si);CHKERRQ(ierr); 4258 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4259 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4260 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4261 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4262 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4263 ierr = PetscFree(status);CHKERRQ(ierr); 4264 4265 /* compute a local seq matrix in each processor */ 4266 /*----------------------------------------------*/ 4267 /* allocate bi array and free space for accumulating nonzero column info */ 4268 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4269 bi[0] = 0; 4270 4271 /* create and initialize a linked list */ 4272 nlnk = N+1; 4273 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4274 4275 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4276 len = ai[owners[rank+1]] - ai[owners[rank]]; 4277 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4278 4279 current_space = free_space; 4280 4281 /* determine symbolic info for each local row */ 4282 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4283 4284 for (k=0; k<merge->nrecv; k++) { 4285 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4286 nrows = *buf_ri_k[k]; 4287 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4288 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4289 } 4290 4291 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4292 len = 0; 4293 for (i=0; i<m; i++) { 4294 bnzi = 0; 4295 /* add local non-zero cols of this proc's seqmat into lnk */ 4296 arow = owners[rank] + i; 4297 anzi = ai[arow+1] - ai[arow]; 4298 aj = a->j + ai[arow]; 4299 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4300 bnzi += nlnk; 4301 /* add received col data into lnk */ 4302 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4303 if (i == *nextrow[k]) { /* i-th row */ 4304 anzi = *(nextai[k]+1) - *nextai[k]; 4305 aj = buf_rj[k] + *nextai[k]; 4306 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4307 bnzi += nlnk; 4308 nextrow[k]++; nextai[k]++; 4309 } 4310 } 4311 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4312 4313 /* if free space is not available, make more free space */ 4314 if (current_space->local_remaining<bnzi) { 4315 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4316 nspacedouble++; 4317 } 4318 /* copy data into free space, then initialize lnk */ 4319 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4320 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4321 4322 current_space->array += bnzi; 4323 current_space->local_used += bnzi; 4324 current_space->local_remaining -= bnzi; 4325 4326 bi[i+1] = bi[i] + bnzi; 4327 } 4328 4329 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4330 4331 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4332 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4333 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4334 4335 /* create symbolic parallel matrix B_mpi */ 4336 /*---------------------------------------*/ 4337 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4338 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4339 if (n==PETSC_DECIDE) { 4340 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4341 } else { 4342 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4343 } 4344 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4345 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4346 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4347 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4348 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4349 4350 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4351 B_mpi->assembled = PETSC_FALSE; 4352 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4353 merge->bi = bi; 4354 merge->bj = bj; 4355 merge->buf_ri = buf_ri; 4356 merge->buf_rj = buf_rj; 4357 merge->coi = NULL; 4358 merge->coj = NULL; 4359 merge->owners_co = NULL; 4360 4361 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4362 4363 /* attach the supporting struct to B_mpi for reuse */ 4364 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4365 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4366 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4367 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4368 *mpimat = B_mpi; 4369 4370 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 4374 #undef __FUNCT__ 4375 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4376 /*@C 4377 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4378 matrices from each processor 4379 4380 Collective on MPI_Comm 4381 4382 Input Parameters: 4383 + comm - the communicators the parallel matrix will live on 4384 . seqmat - the input sequential matrices 4385 . m - number of local rows (or PETSC_DECIDE) 4386 . n - number of local columns (or PETSC_DECIDE) 4387 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4388 4389 Output Parameter: 4390 . mpimat - the parallel matrix generated 4391 4392 Level: advanced 4393 4394 Notes: 4395 The dimensions of the sequential matrix in each processor MUST be the same. 4396 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4397 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4398 @*/ 4399 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4400 { 4401 PetscErrorCode ierr; 4402 PetscMPIInt size; 4403 4404 PetscFunctionBegin; 4405 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4406 if (size == 1) { 4407 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4408 if (scall == MAT_INITIAL_MATRIX) { 4409 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4410 } else { 4411 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4412 } 4413 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4414 PetscFunctionReturn(0); 4415 } 4416 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4417 if (scall == MAT_INITIAL_MATRIX) { 4418 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4419 } 4420 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4421 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4422 PetscFunctionReturn(0); 4423 } 4424 4425 #undef __FUNCT__ 4426 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4427 /*@ 4428 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4429 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4430 with MatGetSize() 4431 4432 Not Collective 4433 4434 Input Parameters: 4435 + A - the matrix 4436 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4437 4438 Output Parameter: 4439 . A_loc - the local sequential matrix generated 4440 4441 Level: developer 4442 4443 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4444 4445 @*/ 4446 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4447 { 4448 PetscErrorCode ierr; 4449 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4450 Mat_SeqAIJ *mat,*a,*b; 4451 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4452 MatScalar *aa,*ba,*cam; 4453 PetscScalar *ca; 4454 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4455 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4456 PetscBool match; 4457 MPI_Comm comm; 4458 PetscMPIInt size; 4459 4460 PetscFunctionBegin; 4461 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4462 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4463 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4464 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4465 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4466 4467 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4468 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4469 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4470 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4471 aa = a->a; ba = b->a; 4472 if (scall == MAT_INITIAL_MATRIX) { 4473 if (size == 1) { 4474 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4475 PetscFunctionReturn(0); 4476 } 4477 4478 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4479 ci[0] = 0; 4480 for (i=0; i<am; i++) { 4481 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4482 } 4483 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4484 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4485 k = 0; 4486 for (i=0; i<am; i++) { 4487 ncols_o = bi[i+1] - bi[i]; 4488 ncols_d = ai[i+1] - ai[i]; 4489 /* off-diagonal portion of A */ 4490 for (jo=0; jo<ncols_o; jo++) { 4491 col = cmap[*bj]; 4492 if (col >= cstart) break; 4493 cj[k] = col; bj++; 4494 ca[k++] = *ba++; 4495 } 4496 /* diagonal portion of A */ 4497 for (j=0; j<ncols_d; j++) { 4498 cj[k] = cstart + *aj++; 4499 ca[k++] = *aa++; 4500 } 4501 /* off-diagonal portion of A */ 4502 for (j=jo; j<ncols_o; j++) { 4503 cj[k] = cmap[*bj++]; 4504 ca[k++] = *ba++; 4505 } 4506 } 4507 /* put together the new matrix */ 4508 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4509 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4510 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4511 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4512 mat->free_a = PETSC_TRUE; 4513 mat->free_ij = PETSC_TRUE; 4514 mat->nonew = 0; 4515 } else if (scall == MAT_REUSE_MATRIX) { 4516 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4517 ci = mat->i; cj = mat->j; cam = mat->a; 4518 for (i=0; i<am; i++) { 4519 /* off-diagonal portion of A */ 4520 ncols_o = bi[i+1] - bi[i]; 4521 for (jo=0; jo<ncols_o; jo++) { 4522 col = cmap[*bj]; 4523 if (col >= cstart) break; 4524 *cam++ = *ba++; bj++; 4525 } 4526 /* diagonal portion of A */ 4527 ncols_d = ai[i+1] - ai[i]; 4528 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4529 /* off-diagonal portion of A */ 4530 for (j=jo; j<ncols_o; j++) { 4531 *cam++ = *ba++; bj++; 4532 } 4533 } 4534 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4535 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 #undef __FUNCT__ 4540 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4541 /*@C 4542 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4543 4544 Not Collective 4545 4546 Input Parameters: 4547 + A - the matrix 4548 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4549 - row, col - index sets of rows and columns to extract (or NULL) 4550 4551 Output Parameter: 4552 . A_loc - the local sequential matrix generated 4553 4554 Level: developer 4555 4556 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4557 4558 @*/ 4559 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4560 { 4561 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4562 PetscErrorCode ierr; 4563 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4564 IS isrowa,iscola; 4565 Mat *aloc; 4566 PetscBool match; 4567 4568 PetscFunctionBegin; 4569 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4570 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4571 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4572 if (!row) { 4573 start = A->rmap->rstart; end = A->rmap->rend; 4574 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4575 } else { 4576 isrowa = *row; 4577 } 4578 if (!col) { 4579 start = A->cmap->rstart; 4580 cmap = a->garray; 4581 nzA = a->A->cmap->n; 4582 nzB = a->B->cmap->n; 4583 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4584 ncols = 0; 4585 for (i=0; i<nzB; i++) { 4586 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4587 else break; 4588 } 4589 imark = i; 4590 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4591 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4592 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4593 } else { 4594 iscola = *col; 4595 } 4596 if (scall != MAT_INITIAL_MATRIX) { 4597 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4598 aloc[0] = *A_loc; 4599 } 4600 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4601 *A_loc = aloc[0]; 4602 ierr = PetscFree(aloc);CHKERRQ(ierr); 4603 if (!row) { 4604 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4605 } 4606 if (!col) { 4607 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4608 } 4609 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4610 PetscFunctionReturn(0); 4611 } 4612 4613 #undef __FUNCT__ 4614 #define __FUNCT__ "MatGetBrowsOfAcols" 4615 /*@C 4616 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4617 4618 Collective on Mat 4619 4620 Input Parameters: 4621 + A,B - the matrices in mpiaij format 4622 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4623 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4624 4625 Output Parameter: 4626 + rowb, colb - index sets of rows and columns of B to extract 4627 - B_seq - the sequential matrix generated 4628 4629 Level: developer 4630 4631 @*/ 4632 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4633 { 4634 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4635 PetscErrorCode ierr; 4636 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4637 IS isrowb,iscolb; 4638 Mat *bseq=NULL; 4639 4640 PetscFunctionBegin; 4641 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4642 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4643 } 4644 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4645 4646 if (scall == MAT_INITIAL_MATRIX) { 4647 start = A->cmap->rstart; 4648 cmap = a->garray; 4649 nzA = a->A->cmap->n; 4650 nzB = a->B->cmap->n; 4651 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4652 ncols = 0; 4653 for (i=0; i<nzB; i++) { /* row < local row index */ 4654 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4655 else break; 4656 } 4657 imark = i; 4658 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4659 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4660 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4661 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4662 } else { 4663 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4664 isrowb = *rowb; iscolb = *colb; 4665 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4666 bseq[0] = *B_seq; 4667 } 4668 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4669 *B_seq = bseq[0]; 4670 ierr = PetscFree(bseq);CHKERRQ(ierr); 4671 if (!rowb) { 4672 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4673 } else { 4674 *rowb = isrowb; 4675 } 4676 if (!colb) { 4677 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4678 } else { 4679 *colb = iscolb; 4680 } 4681 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4682 PetscFunctionReturn(0); 4683 } 4684 4685 #undef __FUNCT__ 4686 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4687 /* 4688 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4689 of the OFF-DIAGONAL portion of local A 4690 4691 Collective on Mat 4692 4693 Input Parameters: 4694 + A,B - the matrices in mpiaij format 4695 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4696 4697 Output Parameter: 4698 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4699 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4700 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4701 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4702 4703 Level: developer 4704 4705 */ 4706 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4707 { 4708 VecScatter_MPI_General *gen_to,*gen_from; 4709 PetscErrorCode ierr; 4710 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4711 Mat_SeqAIJ *b_oth; 4712 VecScatter ctx =a->Mvctx; 4713 MPI_Comm comm; 4714 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4715 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4716 PetscScalar *rvalues,*svalues; 4717 MatScalar *b_otha,*bufa,*bufA; 4718 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4719 MPI_Request *rwaits = NULL,*swaits = NULL; 4720 MPI_Status *sstatus,rstatus; 4721 PetscMPIInt jj,size; 4722 PetscInt *cols,sbs,rbs; 4723 PetscScalar *vals; 4724 4725 PetscFunctionBegin; 4726 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4727 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4728 4729 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4730 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4731 } 4732 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4733 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4734 4735 gen_to = (VecScatter_MPI_General*)ctx->todata; 4736 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4737 rvalues = gen_from->values; /* holds the length of receiving row */ 4738 svalues = gen_to->values; /* holds the length of sending row */ 4739 nrecvs = gen_from->n; 4740 nsends = gen_to->n; 4741 4742 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4743 srow = gen_to->indices; /* local row index to be sent */ 4744 sstarts = gen_to->starts; 4745 sprocs = gen_to->procs; 4746 sstatus = gen_to->sstatus; 4747 sbs = gen_to->bs; 4748 rstarts = gen_from->starts; 4749 rprocs = gen_from->procs; 4750 rbs = gen_from->bs; 4751 4752 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4753 if (scall == MAT_INITIAL_MATRIX) { 4754 /* i-array */ 4755 /*---------*/ 4756 /* post receives */ 4757 for (i=0; i<nrecvs; i++) { 4758 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4759 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4760 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4761 } 4762 4763 /* pack the outgoing message */ 4764 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4765 4766 sstartsj[0] = 0; 4767 rstartsj[0] = 0; 4768 len = 0; /* total length of j or a array to be sent */ 4769 k = 0; 4770 for (i=0; i<nsends; i++) { 4771 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4772 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4773 for (j=0; j<nrows; j++) { 4774 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4775 for (l=0; l<sbs; l++) { 4776 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4777 4778 rowlen[j*sbs+l] = ncols; 4779 4780 len += ncols; 4781 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4782 } 4783 k++; 4784 } 4785 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4786 4787 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4788 } 4789 /* recvs and sends of i-array are completed */ 4790 i = nrecvs; 4791 while (i--) { 4792 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4793 } 4794 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4795 4796 /* allocate buffers for sending j and a arrays */ 4797 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4798 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4799 4800 /* create i-array of B_oth */ 4801 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4802 4803 b_othi[0] = 0; 4804 len = 0; /* total length of j or a array to be received */ 4805 k = 0; 4806 for (i=0; i<nrecvs; i++) { 4807 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4808 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4809 for (j=0; j<nrows; j++) { 4810 b_othi[k+1] = b_othi[k] + rowlen[j]; 4811 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4812 k++; 4813 } 4814 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4815 } 4816 4817 /* allocate space for j and a arrrays of B_oth */ 4818 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4819 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4820 4821 /* j-array */ 4822 /*---------*/ 4823 /* post receives of j-array */ 4824 for (i=0; i<nrecvs; i++) { 4825 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4826 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4827 } 4828 4829 /* pack the outgoing message j-array */ 4830 k = 0; 4831 for (i=0; i<nsends; i++) { 4832 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4833 bufJ = bufj+sstartsj[i]; 4834 for (j=0; j<nrows; j++) { 4835 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4836 for (ll=0; ll<sbs; ll++) { 4837 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4838 for (l=0; l<ncols; l++) { 4839 *bufJ++ = cols[l]; 4840 } 4841 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4842 } 4843 } 4844 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4845 } 4846 4847 /* recvs and sends of j-array are completed */ 4848 i = nrecvs; 4849 while (i--) { 4850 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4851 } 4852 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4853 } else if (scall == MAT_REUSE_MATRIX) { 4854 sstartsj = *startsj_s; 4855 rstartsj = *startsj_r; 4856 bufa = *bufa_ptr; 4857 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4858 b_otha = b_oth->a; 4859 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4860 4861 /* a-array */ 4862 /*---------*/ 4863 /* post receives of a-array */ 4864 for (i=0; i<nrecvs; i++) { 4865 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4866 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4867 } 4868 4869 /* pack the outgoing message a-array */ 4870 k = 0; 4871 for (i=0; i<nsends; i++) { 4872 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4873 bufA = bufa+sstartsj[i]; 4874 for (j=0; j<nrows; j++) { 4875 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4876 for (ll=0; ll<sbs; ll++) { 4877 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4878 for (l=0; l<ncols; l++) { 4879 *bufA++ = vals[l]; 4880 } 4881 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4882 } 4883 } 4884 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4885 } 4886 /* recvs and sends of a-array are completed */ 4887 i = nrecvs; 4888 while (i--) { 4889 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4890 } 4891 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4892 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4893 4894 if (scall == MAT_INITIAL_MATRIX) { 4895 /* put together the new matrix */ 4896 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4897 4898 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4899 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4900 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4901 b_oth->free_a = PETSC_TRUE; 4902 b_oth->free_ij = PETSC_TRUE; 4903 b_oth->nonew = 0; 4904 4905 ierr = PetscFree(bufj);CHKERRQ(ierr); 4906 if (!startsj_s || !bufa_ptr) { 4907 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4908 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4909 } else { 4910 *startsj_s = sstartsj; 4911 *startsj_r = rstartsj; 4912 *bufa_ptr = bufa; 4913 } 4914 } 4915 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "MatGetCommunicationStructs" 4921 /*@C 4922 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4923 4924 Not Collective 4925 4926 Input Parameters: 4927 . A - The matrix in mpiaij format 4928 4929 Output Parameter: 4930 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4931 . colmap - A map from global column index to local index into lvec 4932 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4933 4934 Level: developer 4935 4936 @*/ 4937 #if defined(PETSC_USE_CTABLE) 4938 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4939 #else 4940 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4941 #endif 4942 { 4943 Mat_MPIAIJ *a; 4944 4945 PetscFunctionBegin; 4946 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4947 PetscValidPointer(lvec, 2); 4948 PetscValidPointer(colmap, 3); 4949 PetscValidPointer(multScatter, 4); 4950 a = (Mat_MPIAIJ*) A->data; 4951 if (lvec) *lvec = a->lvec; 4952 if (colmap) *colmap = a->colmap; 4953 if (multScatter) *multScatter = a->Mvctx; 4954 PetscFunctionReturn(0); 4955 } 4956 4957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4959 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4960 #if defined(PETSC_HAVE_ELEMENTAL) 4961 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4962 #endif 4963 4964 #undef __FUNCT__ 4965 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4966 /* 4967 Computes (B'*A')' since computing B*A directly is untenable 4968 4969 n p p 4970 ( ) ( ) ( ) 4971 m ( A ) * n ( B ) = m ( C ) 4972 ( ) ( ) ( ) 4973 4974 */ 4975 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4976 { 4977 PetscErrorCode ierr; 4978 Mat At,Bt,Ct; 4979 4980 PetscFunctionBegin; 4981 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4982 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4983 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4984 ierr = MatDestroy(&At);CHKERRQ(ierr); 4985 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4986 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4987 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 #undef __FUNCT__ 4992 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4993 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4994 { 4995 PetscErrorCode ierr; 4996 PetscInt m=A->rmap->n,n=B->cmap->n; 4997 Mat Cmat; 4998 4999 PetscFunctionBegin; 5000 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5001 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5002 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5003 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5004 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5005 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5006 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5007 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5008 5009 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5010 5011 *C = Cmat; 5012 PetscFunctionReturn(0); 5013 } 5014 5015 /* ----------------------------------------------------------------*/ 5016 #undef __FUNCT__ 5017 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5018 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5019 { 5020 PetscErrorCode ierr; 5021 5022 PetscFunctionBegin; 5023 if (scall == MAT_INITIAL_MATRIX) { 5024 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5025 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5026 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5027 } 5028 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5029 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5030 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5031 PetscFunctionReturn(0); 5032 } 5033 5034 /*MC 5035 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5036 5037 Options Database Keys: 5038 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5039 5040 Level: beginner 5041 5042 .seealso: MatCreateAIJ() 5043 M*/ 5044 5045 #undef __FUNCT__ 5046 #define __FUNCT__ "MatCreate_MPIAIJ" 5047 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5048 { 5049 Mat_MPIAIJ *b; 5050 PetscErrorCode ierr; 5051 PetscMPIInt size; 5052 5053 PetscFunctionBegin; 5054 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5055 5056 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5057 B->data = (void*)b; 5058 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5059 B->assembled = PETSC_FALSE; 5060 B->insertmode = NOT_SET_VALUES; 5061 b->size = size; 5062 5063 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5064 5065 /* build cache for off array entries formed */ 5066 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5067 5068 b->donotstash = PETSC_FALSE; 5069 b->colmap = 0; 5070 b->garray = 0; 5071 b->roworiented = PETSC_TRUE; 5072 5073 /* stuff used for matrix vector multiply */ 5074 b->lvec = NULL; 5075 b->Mvctx = NULL; 5076 5077 /* stuff for MatGetRow() */ 5078 b->rowindices = 0; 5079 b->rowvalues = 0; 5080 b->getrowactive = PETSC_FALSE; 5081 5082 /* flexible pointer used in CUSP/CUSPARSE classes */ 5083 b->spptr = NULL; 5084 5085 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5096 #if defined(PETSC_HAVE_ELEMENTAL) 5097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5098 #endif 5099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5103 PetscFunctionReturn(0); 5104 } 5105 5106 #undef __FUNCT__ 5107 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5108 /*@ 5109 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5110 and "off-diagonal" part of the matrix in CSR format. 5111 5112 Collective on MPI_Comm 5113 5114 Input Parameters: 5115 + comm - MPI communicator 5116 . m - number of local rows (Cannot be PETSC_DECIDE) 5117 . n - This value should be the same as the local size used in creating the 5118 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5119 calculated if N is given) For square matrices n is almost always m. 5120 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5121 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5122 . i - row indices for "diagonal" portion of matrix 5123 . j - column indices 5124 . a - matrix values 5125 . oi - row indices for "off-diagonal" portion of matrix 5126 . oj - column indices 5127 - oa - matrix values 5128 5129 Output Parameter: 5130 . mat - the matrix 5131 5132 Level: advanced 5133 5134 Notes: 5135 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5136 must free the arrays once the matrix has been destroyed and not before. 5137 5138 The i and j indices are 0 based 5139 5140 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5141 5142 This sets local rows and cannot be used to set off-processor values. 5143 5144 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5145 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5146 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5147 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5148 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5149 communication if it is known that only local entries will be set. 5150 5151 .keywords: matrix, aij, compressed row, sparse, parallel 5152 5153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5154 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5155 @*/ 5156 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5157 { 5158 PetscErrorCode ierr; 5159 Mat_MPIAIJ *maij; 5160 5161 PetscFunctionBegin; 5162 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5163 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5164 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5165 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5166 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5167 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5168 maij = (Mat_MPIAIJ*) (*mat)->data; 5169 5170 (*mat)->preallocated = PETSC_TRUE; 5171 5172 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5173 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5174 5175 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5176 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5177 5178 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5179 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5180 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5181 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5182 5183 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5184 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5185 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5186 PetscFunctionReturn(0); 5187 } 5188 5189 /* 5190 Special version for direct calls from Fortran 5191 */ 5192 #include <petsc/private/fortranimpl.h> 5193 5194 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5195 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5196 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5197 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5198 #endif 5199 5200 /* Change these macros so can be used in void function */ 5201 #undef CHKERRQ 5202 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5203 #undef SETERRQ2 5204 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5205 #undef SETERRQ3 5206 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5207 #undef SETERRQ 5208 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5209 5210 #undef __FUNCT__ 5211 #define __FUNCT__ "matsetvaluesmpiaij_" 5212 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5213 { 5214 Mat mat = *mmat; 5215 PetscInt m = *mm, n = *mn; 5216 InsertMode addv = *maddv; 5217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5218 PetscScalar value; 5219 PetscErrorCode ierr; 5220 5221 MatCheckPreallocated(mat,1); 5222 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5223 5224 #if defined(PETSC_USE_DEBUG) 5225 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5226 #endif 5227 { 5228 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5229 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5230 PetscBool roworiented = aij->roworiented; 5231 5232 /* Some Variables required in the macro */ 5233 Mat A = aij->A; 5234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5235 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5236 MatScalar *aa = a->a; 5237 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5238 Mat B = aij->B; 5239 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5240 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5241 MatScalar *ba = b->a; 5242 5243 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5244 PetscInt nonew = a->nonew; 5245 MatScalar *ap1,*ap2; 5246 5247 PetscFunctionBegin; 5248 for (i=0; i<m; i++) { 5249 if (im[i] < 0) continue; 5250 #if defined(PETSC_USE_DEBUG) 5251 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5252 #endif 5253 if (im[i] >= rstart && im[i] < rend) { 5254 row = im[i] - rstart; 5255 lastcol1 = -1; 5256 rp1 = aj + ai[row]; 5257 ap1 = aa + ai[row]; 5258 rmax1 = aimax[row]; 5259 nrow1 = ailen[row]; 5260 low1 = 0; 5261 high1 = nrow1; 5262 lastcol2 = -1; 5263 rp2 = bj + bi[row]; 5264 ap2 = ba + bi[row]; 5265 rmax2 = bimax[row]; 5266 nrow2 = bilen[row]; 5267 low2 = 0; 5268 high2 = nrow2; 5269 5270 for (j=0; j<n; j++) { 5271 if (roworiented) value = v[i*n+j]; 5272 else value = v[i+j*m]; 5273 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5274 if (in[j] >= cstart && in[j] < cend) { 5275 col = in[j] - cstart; 5276 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5277 } else if (in[j] < 0) continue; 5278 #if defined(PETSC_USE_DEBUG) 5279 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5280 #endif 5281 else { 5282 if (mat->was_assembled) { 5283 if (!aij->colmap) { 5284 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5285 } 5286 #if defined(PETSC_USE_CTABLE) 5287 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5288 col--; 5289 #else 5290 col = aij->colmap[in[j]] - 1; 5291 #endif 5292 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5293 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5294 col = in[j]; 5295 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5296 B = aij->B; 5297 b = (Mat_SeqAIJ*)B->data; 5298 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5299 rp2 = bj + bi[row]; 5300 ap2 = ba + bi[row]; 5301 rmax2 = bimax[row]; 5302 nrow2 = bilen[row]; 5303 low2 = 0; 5304 high2 = nrow2; 5305 bm = aij->B->rmap->n; 5306 ba = b->a; 5307 } 5308 } else col = in[j]; 5309 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5310 } 5311 } 5312 } else if (!aij->donotstash) { 5313 if (roworiented) { 5314 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5315 } else { 5316 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5317 } 5318 } 5319 } 5320 } 5321 PetscFunctionReturnVoid(); 5322 } 5323 5324