1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 VecScatter Mvctx = a->Mvctx; 939 940 PetscFunctionBegin; 941 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 942 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 943 944 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 946 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 962 { 963 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 964 PetscErrorCode ierr; 965 VecScatter Mvctx = a->Mvctx; 966 967 PetscFunctionBegin; 968 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 969 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 PetscErrorCode ierr; 980 PetscBool merged; 981 982 PetscFunctionBegin; 983 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 984 /* do nondiagonal part */ 985 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 986 if (!merged) { 987 /* send it on its way */ 988 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 989 /* do local part */ 990 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 991 /* receive remote parts: note this assumes the values are not actually */ 992 /* added in yy until the next line, */ 993 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 } else { 995 /* do local part */ 996 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* values actually were received in the Begin() but we need to call this nop */ 1000 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1006 { 1007 MPI_Comm comm; 1008 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1009 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1010 IS Me,Notme; 1011 PetscErrorCode ierr; 1012 PetscInt M,N,first,last,*notme,i; 1013 PetscMPIInt size; 1014 1015 PetscFunctionBegin; 1016 /* Easy test: symmetric diagonal block */ 1017 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1018 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1019 if (!*f) PetscFunctionReturn(0); 1020 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1021 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1026 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1027 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1031 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1032 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1033 Aoff = Aoffs[0]; 1034 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1035 Boff = Boffs[0]; 1036 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1037 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1039 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1041 ierr = PetscFree(notme);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1051 PetscFunctionReturn(0); 1052 } 1053 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1076 { 1077 PetscErrorCode ierr; 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1079 1080 PetscFunctionBegin; 1081 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1082 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1083 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1094 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1120 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1121 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1122 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1123 1124 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1133 #if defined(PETSC_HAVE_ELEMENTAL) 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1135 #endif 1136 #if defined(PETSC_HAVE_HYPRE) 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1139 #endif 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1144 { 1145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1146 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1147 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1148 PetscErrorCode ierr; 1149 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1150 int fd; 1151 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1152 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1153 PetscScalar *column_values; 1154 PetscInt message_count,flowcontrolcount; 1155 FILE *file; 1156 1157 PetscFunctionBegin; 1158 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1159 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1160 nz = A->nz + B->nz; 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 if (!rank) { 1163 header[0] = MAT_FILE_CLASSID; 1164 header[1] = mat->rmap->N; 1165 header[2] = mat->cmap->N; 1166 1167 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1168 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1169 /* get largest number of rows any processor has */ 1170 rlen = mat->rmap->n; 1171 range = mat->rmap->range; 1172 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1173 } else { 1174 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 rlen = mat->rmap->n; 1176 } 1177 1178 /* load up the local row counts */ 1179 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1180 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1181 1182 /* store the row lengths to the file */ 1183 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1184 if (!rank) { 1185 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 for (i=1; i<size; i++) { 1187 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1188 rlen = range[i+1] - range[i]; 1189 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 } 1192 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1193 } else { 1194 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1195 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1196 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1197 } 1198 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1199 1200 /* load up the local column indices */ 1201 nzmax = nz; /* th processor needs space a largest processor needs */ 1202 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1204 cnt = 0; 1205 for (i=0; i<mat->rmap->n; i++) { 1206 for (j=B->i[i]; j<B->i[i+1]; j++) { 1207 if ((col = garray[B->j[j]]) > cstart) break; 1208 column_indices[cnt++] = col; 1209 } 1210 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1211 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1212 } 1213 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1214 1215 /* store the column indices to the file */ 1216 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1217 if (!rank) { 1218 MPI_Status status; 1219 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 for (i=1; i<size; i++) { 1221 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1222 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1223 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1224 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1226 } 1227 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1228 } else { 1229 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1230 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1235 1236 /* load up the local column values */ 1237 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1238 cnt = 0; 1239 for (i=0; i<mat->rmap->n; i++) { 1240 for (j=B->i[i]; j<B->i[i+1]; j++) { 1241 if (garray[B->j[j]] > cstart) break; 1242 column_values[cnt++] = B->a[j]; 1243 } 1244 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1245 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1246 } 1247 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1248 1249 /* store the column values to the file */ 1250 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1251 if (!rank) { 1252 MPI_Status status; 1253 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 for (i=1; i<size; i++) { 1255 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1256 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1257 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1258 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1260 } 1261 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1264 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1265 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1266 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(column_values);CHKERRQ(ierr); 1269 1270 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1271 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #include <petscdraw.h> 1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1277 { 1278 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1279 PetscErrorCode ierr; 1280 PetscMPIInt rank = aij->rank,size = aij->size; 1281 PetscBool isdraw,iascii,isbinary; 1282 PetscViewer sviewer; 1283 PetscViewerFormat format; 1284 1285 PetscFunctionBegin; 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1288 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1289 if (iascii) { 1290 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscBool inodes; 1294 1295 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1296 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1297 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1298 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1299 if (!inodes) { 1300 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1301 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1302 } else { 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1304 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1305 } 1306 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1308 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1310 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1313 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount,inodelimit,*inodes; 1317 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1318 if (inodes) { 1319 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1322 } 1323 PetscFunctionReturn(0); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(0); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1330 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1331 } else { 1332 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1333 } 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1339 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { 1344 /* assemble the entire matrix onto first processor. */ 1345 Mat A; 1346 Mat_SeqAIJ *Aloc; 1347 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1348 MatScalar *a; 1349 1350 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1351 if (!rank) { 1352 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1355 } 1356 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1357 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1358 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1359 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1360 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1361 1362 /* copy over the A part */ 1363 Aloc = (Mat_SeqAIJ*)aij->A->data; 1364 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1365 row = mat->rmap->rstart; 1366 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1367 for (i=0; i<m; i++) { 1368 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1369 row++; 1370 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1371 } 1372 aj = Aloc->j; 1373 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1374 1375 /* copy over the B part */ 1376 Aloc = (Mat_SeqAIJ*)aij->B->data; 1377 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1378 row = mat->rmap->rstart; 1379 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1380 ct = cols; 1381 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1382 for (i=0; i<m; i++) { 1383 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1384 row++; 1385 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1386 } 1387 ierr = PetscFree(ct);CHKERRQ(ierr); 1388 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1389 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 if (!rank) { 1396 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1397 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1398 } 1399 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1400 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1401 ierr = MatDestroy(&A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1407 { 1408 PetscErrorCode ierr; 1409 PetscBool iascii,isdraw,issocket,isbinary; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1415 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1416 if (iascii || isdraw || isbinary || issocket) { 1417 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1425 PetscErrorCode ierr; 1426 Vec bb1 = 0; 1427 PetscBool hasop; 1428 1429 PetscFunctionBegin; 1430 if (flag == SOR_APPLY_UPPER) { 1431 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1436 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1437 } 1438 1439 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1440 if (flag & SOR_ZERO_INITIAL_GUESS) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 its--; 1443 } 1444 1445 while (its--) { 1446 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1448 1449 /* update rhs: bb1 = bb - B*x */ 1450 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1451 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1452 1453 /* local sweep */ 1454 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1455 } 1456 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1457 if (flag & SOR_ZERO_INITIAL_GUESS) { 1458 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1459 its--; 1460 } 1461 while (its--) { 1462 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1463 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1464 1465 /* update rhs: bb1 = bb - B*x */ 1466 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1467 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1468 1469 /* local sweep */ 1470 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1471 } 1472 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1473 if (flag & SOR_ZERO_INITIAL_GUESS) { 1474 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1475 its--; 1476 } 1477 while (its--) { 1478 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1479 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1480 1481 /* update rhs: bb1 = bb - B*x */ 1482 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1483 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1484 1485 /* local sweep */ 1486 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1487 } 1488 } else if (flag & SOR_EISENSTAT) { 1489 Vec xx1; 1490 1491 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1492 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1493 1494 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1495 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 if (!mat->diag) { 1497 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1498 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1499 } 1500 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1501 if (hasop) { 1502 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1503 } else { 1504 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1505 } 1506 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1507 1508 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1509 1510 /* local sweep */ 1511 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1512 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1513 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1514 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1515 1516 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1517 1518 matin->factorerrortype = mat->A->factorerrortype; 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1523 { 1524 Mat aA,aB,Aperm; 1525 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1526 PetscScalar *aa,*ba; 1527 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1528 PetscSF rowsf,sf; 1529 IS parcolp = NULL; 1530 PetscBool done; 1531 PetscErrorCode ierr; 1532 1533 PetscFunctionBegin; 1534 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1535 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1536 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1537 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1541 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1542 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1545 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1549 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1550 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1553 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1554 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1555 1556 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1559 1560 /* Find out where my gcols should go */ 1561 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1562 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1566 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1567 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1568 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1569 1570 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1571 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1572 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i],rowner; 1575 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1576 for (j=ai[i]; j<ai[i+1]; j++) { 1577 PetscInt cowner,col = cdest[aj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 for (j=bi[i]; j<bi[i+1]; j++) { 1583 PetscInt cowner,col = gcdest[bj[j]]; 1584 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1585 if (rowner == cowner) dnnz[i]++; 1586 else onnz[i]++; 1587 } 1588 } 1589 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1591 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1592 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1594 1595 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1596 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1597 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1598 for (i=0; i<m; i++) { 1599 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1600 PetscInt j0,rowlen; 1601 rowlen = ai[i+1] - ai[i]; 1602 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1603 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1604 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 rowlen = bi[i+1] - bi[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { 1608 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1609 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1610 } 1611 } 1612 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1613 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1615 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1616 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1617 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1618 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1619 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1620 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1621 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1622 *B = Aperm; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1627 { 1628 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1629 PetscErrorCode ierr; 1630 1631 PetscFunctionBegin; 1632 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1633 if (ghosts) *ghosts = aij->garray; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 MatCheckPreallocated(A,1); 1699 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1700 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A,1); 1704 a->roworiented = flg; 1705 1706 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1707 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1708 break; 1709 case MAT_NEW_DIAGONALS: 1710 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1711 break; 1712 case MAT_IGNORE_OFF_PROC_ENTRIES: 1713 a->donotstash = flg; 1714 break; 1715 case MAT_SPD: 1716 A->spd_set = PETSC_TRUE; 1717 A->spd = flg; 1718 if (flg) { 1719 A->symmetric = PETSC_TRUE; 1720 A->structurally_symmetric = PETSC_TRUE; 1721 A->symmetric_set = PETSC_TRUE; 1722 A->structurally_symmetric_set = PETSC_TRUE; 1723 } 1724 break; 1725 case MAT_SYMMETRIC: 1726 MatCheckPreallocated(A,1); 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_HERMITIAN: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_SYMMETRY_ETERNAL: 1738 MatCheckPreallocated(A,1); 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_SUBMAT_SINGLEIS: 1742 A->submat_singleis = flg; 1743 break; 1744 case MAT_STRUCTURE_ONLY: 1745 /* The option is handled directly by MatSetOption() */ 1746 break; 1747 default: 1748 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1754 { 1755 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1756 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1757 PetscErrorCode ierr; 1758 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1759 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1760 PetscInt *cmap,*idx_p; 1761 1762 PetscFunctionBegin; 1763 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1764 mat->getrowactive = PETSC_TRUE; 1765 1766 if (!mat->rowvalues && (idx || v)) { 1767 /* 1768 allocate enough space to hold information from the longest row. 1769 */ 1770 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1771 PetscInt max = 1,tmp; 1772 for (i=0; i<matin->rmap->n; i++) { 1773 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1774 if (max < tmp) max = tmp; 1775 } 1776 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1777 } 1778 1779 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1780 lrow = row - rstart; 1781 1782 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1783 if (!v) {pvA = 0; pvB = 0;} 1784 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1785 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1786 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1787 nztot = nzA + nzB; 1788 1789 cmap = mat->garray; 1790 if (v || idx) { 1791 if (nztot) { 1792 /* Sort by increasing column numbers, assuming A and B already sorted */ 1793 PetscInt imark = -1; 1794 if (v) { 1795 *v = v_p = mat->rowvalues; 1796 for (i=0; i<nzB; i++) { 1797 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1798 else break; 1799 } 1800 imark = i; 1801 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1802 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1803 } 1804 if (idx) { 1805 *idx = idx_p = mat->rowindices; 1806 if (imark > -1) { 1807 for (i=0; i<imark; i++) { 1808 idx_p[i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 for (i=0; i<nzB; i++) { 1812 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1813 else break; 1814 } 1815 imark = i; 1816 } 1817 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1818 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1819 } 1820 } else { 1821 if (idx) *idx = 0; 1822 if (v) *v = 0; 1823 } 1824 } 1825 *nz = nztot; 1826 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1827 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 1835 PetscFunctionBegin; 1836 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1837 aij->getrowactive = PETSC_FALSE; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1845 PetscErrorCode ierr; 1846 PetscInt i,j,cstart = mat->cmap->rstart; 1847 PetscReal sum = 0.0; 1848 MatScalar *v; 1849 1850 PetscFunctionBegin; 1851 if (aij->size == 1) { 1852 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1853 } else { 1854 if (type == NORM_FROBENIUS) { 1855 v = amat->a; 1856 for (i=0; i<amat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1858 } 1859 v = bmat->a; 1860 for (i=0; i<bmat->nz; i++) { 1861 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1862 } 1863 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1864 *norm = PetscSqrtReal(*norm); 1865 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1866 } else if (type == NORM_1) { /* max column norm */ 1867 PetscReal *tmp,*tmp2; 1868 PetscInt *jj,*garray = aij->garray; 1869 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1870 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1871 *norm = 0.0; 1872 v = amat->a; jj = amat->j; 1873 for (j=0; j<amat->nz; j++) { 1874 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1875 } 1876 v = bmat->a; jj = bmat->j; 1877 for (j=0; j<bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1879 } 1880 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1881 for (j=0; j<mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 ierr = PetscFree(tmp);CHKERRQ(ierr); 1885 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1886 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j=0; j<aij->A->rmap->n; j++) { 1890 v = amat->a + amat->i[j]; 1891 sum = 0.0; 1892 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 v = bmat->a + bmat->i[j]; 1896 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1897 sum += PetscAbsScalar(*v); v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1904 } 1905 PetscFunctionReturn(0); 1906 } 1907 1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1909 { 1910 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1911 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1912 PetscErrorCode ierr; 1913 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1914 PetscInt cstart = A->cmap->rstart,ncol; 1915 Mat B; 1916 MatScalar *array; 1917 1918 PetscFunctionBegin; 1919 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1920 1921 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1922 ai = Aloc->i; aj = Aloc->j; 1923 bi = Bloc->i; bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz,*g_nnz,*o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1930 /* compute d_nnz for preallocation */ 1931 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<ai[ma]; i++) { 1933 d_nnz[aj[i]]++; 1934 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1935 } 1936 /* compute local off-diagonal contributions */ 1937 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1939 /* map those to global */ 1940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1941 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1942 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1943 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1946 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1947 1948 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1949 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1950 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1951 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1952 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1953 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1954 } else { 1955 B = *matout; 1956 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1957 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1958 } 1959 1960 /* copy over the A part */ 1961 array = Aloc->a; 1962 row = A->rmap->rstart; 1963 for (i=0; i<ma; i++) { 1964 ncol = ai[i+1]-ai[i]; 1965 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1966 row++; 1967 array += ncol; aj += ncol; 1968 } 1969 aj = Aloc->j; 1970 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1971 1972 /* copy over the B part */ 1973 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1974 array = Bloc->a; 1975 row = A->rmap->rstart; 1976 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1977 cols_tmp = cols; 1978 for (i=0; i<mb; i++) { 1979 ncol = bi[i+1]-bi[i]; 1980 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1981 row++; 1982 array += ncol; cols_tmp += ncol; 1983 } 1984 ierr = PetscFree(cols);CHKERRQ(ierr); 1985 1986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1999 Mat a = aij->A,b = aij->B; 2000 PetscErrorCode ierr; 2001 PetscInt s1,s2,s3; 2002 2003 PetscFunctionBegin; 2004 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2005 if (rr) { 2006 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2007 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2008 /* Overlap communication with computation. */ 2009 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 } 2011 if (ll) { 2012 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2013 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2014 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2015 } 2016 /* scale the diagonal block */ 2017 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2018 2019 if (rr) { 2020 /* Do a scatter end and then right scale the off-diagonal block */ 2021 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2023 } 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2038 { 2039 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2040 Mat a,b,c,d; 2041 PetscBool flg; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; b = matA->B; 2046 c = matB->A; d = matB->B; 2047 2048 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2049 if (flg) { 2050 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2051 } 2052 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2057 { 2058 PetscErrorCode ierr; 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2061 2062 PetscFunctionBegin; 2063 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2064 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2065 /* because of the column compression in the off-processor part of the matrix a->B, 2066 the number of columns in a->B and b->B may be different, hence we cannot call 2067 the MatCopy() directly on the two parts. If need be, we can provide a more 2068 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2069 then copying the submatrices */ 2070 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2071 } else { 2072 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2073 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2074 } 2075 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2080 { 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 /* 2089 Computes the number of nonzeros per row needed for preallocation when X and Y 2090 have different nonzero structure. 2091 */ 2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2093 { 2094 PetscInt i,j,k,nzx,nzy; 2095 2096 PetscFunctionBegin; 2097 /* Set the number of nonzeros in the new matrix */ 2098 for (i=0; i<m; i++) { 2099 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2100 nzx = xi[i+1] - xi[i]; 2101 nzy = yi[i+1] - yi[i]; 2102 nnz[i] = 0; 2103 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2104 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2105 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2106 nnz[i]++; 2107 } 2108 for (; k<nzy; k++) nnz[i]++; 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt m = Y->rmap->N; 2118 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2119 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2120 2121 PetscFunctionBegin; 2122 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2127 { 2128 PetscErrorCode ierr; 2129 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2130 PetscBLASInt bnz,one=1; 2131 Mat_SeqAIJ *x,*y; 2132 2133 PetscFunctionBegin; 2134 if (str == SAME_NONZERO_PATTERN) { 2135 PetscScalar alpha = a; 2136 x = (Mat_SeqAIJ*)xx->A->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 y = (Mat_SeqAIJ*)yy->A->data; 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 x = (Mat_SeqAIJ*)xx->B->data; 2141 y = (Mat_SeqAIJ*)yy->B->data; 2142 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2143 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2144 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2145 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2146 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2147 } else { 2148 Mat B; 2149 PetscInt *nnz_d,*nnz_o; 2150 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2151 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2152 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2153 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2154 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2155 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2156 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2157 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2158 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2159 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2160 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2161 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2162 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2163 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2164 } 2165 PetscFunctionReturn(0); 2166 } 2167 2168 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2169 2170 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2171 { 2172 #if defined(PETSC_USE_COMPLEX) 2173 PetscErrorCode ierr; 2174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2175 2176 PetscFunctionBegin; 2177 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2178 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2179 #else 2180 PetscFunctionBegin; 2181 #endif 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2188 PetscErrorCode ierr; 2189 2190 PetscFunctionBegin; 2191 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2192 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2197 { 2198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2199 PetscErrorCode ierr; 2200 2201 PetscFunctionBegin; 2202 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2203 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2210 PetscErrorCode ierr; 2211 PetscInt i,*idxb = 0; 2212 PetscScalar *va,*vb; 2213 Vec vtmp; 2214 2215 PetscFunctionBegin; 2216 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2217 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2218 if (idx) { 2219 for (i=0; i<A->rmap->n; i++) { 2220 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2221 } 2222 } 2223 2224 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2225 if (idx) { 2226 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2227 } 2228 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2229 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2230 2231 for (i=0; i<A->rmap->n; i++) { 2232 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2233 va[i] = vb[i]; 2234 if (idx) idx[i] = a->garray[idxb[i]]; 2235 } 2236 } 2237 2238 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2239 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2240 ierr = PetscFree(idxb);CHKERRQ(ierr); 2241 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 PetscInt i,*idxb = 0; 2250 PetscScalar *va,*vb; 2251 Vec vtmp; 2252 2253 PetscFunctionBegin; 2254 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2255 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2256 if (idx) { 2257 for (i=0; i<A->cmap->n; i++) { 2258 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2259 } 2260 } 2261 2262 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2263 if (idx) { 2264 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2265 } 2266 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2267 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2268 2269 for (i=0; i<A->rmap->n; i++) { 2270 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2271 va[i] = vb[i]; 2272 if (idx) idx[i] = a->garray[idxb[i]]; 2273 } 2274 } 2275 2276 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2277 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2278 ierr = PetscFree(idxb);CHKERRQ(ierr); 2279 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2284 { 2285 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2286 PetscInt n = A->rmap->n; 2287 PetscInt cstart = A->cmap->rstart; 2288 PetscInt *cmap = mat->garray; 2289 PetscInt *diagIdx, *offdiagIdx; 2290 Vec diagV, offdiagV; 2291 PetscScalar *a, *diagA, *offdiagA; 2292 PetscInt r; 2293 PetscErrorCode ierr; 2294 2295 PetscFunctionBegin; 2296 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2297 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2298 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2299 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2300 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2301 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2302 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2303 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2304 for (r = 0; r < n; ++r) { 2305 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 idx[r] = cstart + diagIdx[r]; 2308 } else { 2309 a[r] = offdiagA[r]; 2310 idx[r] = cmap[offdiagIdx[r]]; 2311 } 2312 } 2313 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2314 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2315 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2316 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2317 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2318 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt n = A->rmap->n; 2326 PetscInt cstart = A->cmap->rstart; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 PetscInt r; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2336 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2337 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2338 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2339 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2340 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2341 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2342 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2343 for (r = 0; r < n; ++r) { 2344 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2345 a[r] = diagA[r]; 2346 idx[r] = cstart + diagIdx[r]; 2347 } else { 2348 a[r] = offdiagA[r]; 2349 idx[r] = cmap[offdiagIdx[r]]; 2350 } 2351 } 2352 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2355 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2356 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2357 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2362 { 2363 PetscErrorCode ierr; 2364 Mat *dummy; 2365 2366 PetscFunctionBegin; 2367 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2368 *newmat = *dummy; 2369 ierr = PetscFree(dummy);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372 2373 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2374 { 2375 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2376 PetscErrorCode ierr; 2377 2378 PetscFunctionBegin; 2379 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2380 A->factorerrortype = a->A->factorerrortype; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2385 { 2386 PetscErrorCode ierr; 2387 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2388 2389 PetscFunctionBegin; 2390 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2391 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2392 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2393 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 PetscFunctionReturn(0); 2395 } 2396 2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2398 { 2399 PetscFunctionBegin; 2400 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2401 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 /*@ 2406 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2407 2408 Collective on Mat 2409 2410 Input Parameters: 2411 + A - the matrix 2412 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2413 2414 Level: advanced 2415 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2418 { 2419 PetscErrorCode ierr; 2420 2421 PetscFunctionBegin; 2422 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2427 { 2428 PetscErrorCode ierr; 2429 PetscBool sc = PETSC_FALSE,flg; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2433 ierr = PetscObjectOptionsBegin((PetscObject)A); 2434 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2435 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2436 if (flg) { 2437 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2438 } 2439 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2440 PetscFunctionReturn(0); 2441 } 2442 2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2444 { 2445 PetscErrorCode ierr; 2446 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2447 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2448 2449 PetscFunctionBegin; 2450 if (!Y->preallocated) { 2451 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2452 } else if (!aij->nz) { 2453 PetscInt nonew = aij->nonew; 2454 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2455 aij->nonew = nonew; 2456 } 2457 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2462 { 2463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2464 PetscErrorCode ierr; 2465 2466 PetscFunctionBegin; 2467 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2468 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2469 if (d) { 2470 PetscInt rstart; 2471 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2472 *d += rstart; 2473 2474 } 2475 PetscFunctionReturn(0); 2476 } 2477 2478 2479 /* -------------------------------------------------------------------*/ 2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2481 MatGetRow_MPIAIJ, 2482 MatRestoreRow_MPIAIJ, 2483 MatMult_MPIAIJ, 2484 /* 4*/ MatMultAdd_MPIAIJ, 2485 MatMultTranspose_MPIAIJ, 2486 MatMultTransposeAdd_MPIAIJ, 2487 0, 2488 0, 2489 0, 2490 /*10*/ 0, 2491 0, 2492 0, 2493 MatSOR_MPIAIJ, 2494 MatTranspose_MPIAIJ, 2495 /*15*/ MatGetInfo_MPIAIJ, 2496 MatEqual_MPIAIJ, 2497 MatGetDiagonal_MPIAIJ, 2498 MatDiagonalScale_MPIAIJ, 2499 MatNorm_MPIAIJ, 2500 /*20*/ MatAssemblyBegin_MPIAIJ, 2501 MatAssemblyEnd_MPIAIJ, 2502 MatSetOption_MPIAIJ, 2503 MatZeroEntries_MPIAIJ, 2504 /*24*/ MatZeroRows_MPIAIJ, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*29*/ MatSetUp_MPIAIJ, 2510 0, 2511 0, 2512 MatGetDiagonalBlock_MPIAIJ, 2513 0, 2514 /*34*/ MatDuplicate_MPIAIJ, 2515 0, 2516 0, 2517 0, 2518 0, 2519 /*39*/ MatAXPY_MPIAIJ, 2520 MatCreateSubMatrices_MPIAIJ, 2521 MatIncreaseOverlap_MPIAIJ, 2522 MatGetValues_MPIAIJ, 2523 MatCopy_MPIAIJ, 2524 /*44*/ MatGetRowMax_MPIAIJ, 2525 MatScale_MPIAIJ, 2526 MatShift_MPIAIJ, 2527 MatDiagonalSet_MPIAIJ, 2528 MatZeroRowsColumns_MPIAIJ, 2529 /*49*/ MatSetRandom_MPIAIJ, 2530 0, 2531 0, 2532 0, 2533 0, 2534 /*54*/ MatFDColoringCreate_MPIXAIJ, 2535 0, 2536 MatSetUnfactored_MPIAIJ, 2537 MatPermute_MPIAIJ, 2538 0, 2539 /*59*/ MatCreateSubMatrix_MPIAIJ, 2540 MatDestroy_MPIAIJ, 2541 MatView_MPIAIJ, 2542 0, 2543 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2544 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2545 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2550 MatGetRowMinAbs_MPIAIJ, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*75*/ MatFDColoringApply_AIJ, 2556 MatSetFromOptions_MPIAIJ, 2557 0, 2558 0, 2559 MatFindZeroDiagonals_MPIAIJ, 2560 /*80*/ 0, 2561 0, 2562 0, 2563 /*83*/ MatLoad_MPIAIJ, 2564 MatIsSymmetric_MPIAIJ, 2565 0, 2566 0, 2567 0, 2568 0, 2569 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2570 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2571 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2572 MatPtAP_MPIAIJ_MPIAIJ, 2573 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2574 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2575 0, 2576 0, 2577 0, 2578 0, 2579 /*99*/ 0, 2580 0, 2581 0, 2582 MatConjugate_MPIAIJ, 2583 0, 2584 /*104*/MatSetValuesRow_MPIAIJ, 2585 MatRealPart_MPIAIJ, 2586 MatImaginaryPart_MPIAIJ, 2587 0, 2588 0, 2589 /*109*/0, 2590 0, 2591 MatGetRowMin_MPIAIJ, 2592 0, 2593 MatMissingDiagonal_MPIAIJ, 2594 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2595 0, 2596 MatGetGhosts_MPIAIJ, 2597 0, 2598 0, 2599 /*119*/0, 2600 0, 2601 0, 2602 0, 2603 MatGetMultiProcBlock_MPIAIJ, 2604 /*124*/MatFindNonzeroRows_MPIAIJ, 2605 MatGetColumnNorms_MPIAIJ, 2606 MatInvertBlockDiagonal_MPIAIJ, 2607 0, 2608 MatCreateSubMatricesMPI_MPIAIJ, 2609 /*129*/0, 2610 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2611 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2612 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2613 0, 2614 /*134*/0, 2615 0, 2616 MatRARt_MPIAIJ_MPIAIJ, 2617 0, 2618 0, 2619 /*139*/MatSetBlockSizes_MPIAIJ, 2620 0, 2621 0, 2622 MatFDColoringSetUp_MPIXAIJ, 2623 MatFindOffBlockDiagonalEntries_MPIAIJ, 2624 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2625 }; 2626 2627 /* ----------------------------------------------------------------------------------------*/ 2628 2629 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2630 { 2631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2632 PetscErrorCode ierr; 2633 2634 PetscFunctionBegin; 2635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2641 { 2642 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2643 PetscErrorCode ierr; 2644 2645 PetscFunctionBegin; 2646 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2647 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2648 PetscFunctionReturn(0); 2649 } 2650 2651 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2652 { 2653 Mat_MPIAIJ *b; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2658 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2659 b = (Mat_MPIAIJ*)B->data; 2660 2661 #if defined(PETSC_USE_CTABLE) 2662 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2663 #else 2664 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2665 #endif 2666 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2667 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2668 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2669 2670 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2671 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2672 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2673 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2674 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2675 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2676 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2677 2678 if (!B->preallocated) { 2679 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2680 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2681 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2682 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2683 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2684 } 2685 2686 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2687 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2688 B->preallocated = PETSC_TRUE; 2689 B->was_assembled = PETSC_FALSE; 2690 B->assembled = PETSC_FALSE;; 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2695 { 2696 Mat_MPIAIJ *b; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2701 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2702 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2703 b = (Mat_MPIAIJ*)B->data; 2704 2705 #if defined(PETSC_USE_CTABLE) 2706 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2707 #else 2708 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2709 #endif 2710 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2711 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2712 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2713 2714 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2715 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2716 B->preallocated = PETSC_TRUE; 2717 B->was_assembled = PETSC_FALSE; 2718 B->assembled = PETSC_FALSE; 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2723 { 2724 Mat mat; 2725 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2726 PetscErrorCode ierr; 2727 2728 PetscFunctionBegin; 2729 *newmat = 0; 2730 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2731 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2732 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2733 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2734 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2735 a = (Mat_MPIAIJ*)mat->data; 2736 2737 mat->factortype = matin->factortype; 2738 mat->assembled = PETSC_TRUE; 2739 mat->insertmode = NOT_SET_VALUES; 2740 mat->preallocated = PETSC_TRUE; 2741 2742 a->size = oldmat->size; 2743 a->rank = oldmat->rank; 2744 a->donotstash = oldmat->donotstash; 2745 a->roworiented = oldmat->roworiented; 2746 a->rowindices = 0; 2747 a->rowvalues = 0; 2748 a->getrowactive = PETSC_FALSE; 2749 2750 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2751 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2752 2753 if (oldmat->colmap) { 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2760 #endif 2761 } else a->colmap = 0; 2762 if (oldmat->garray) { 2763 PetscInt len; 2764 len = oldmat->B->cmap->n; 2765 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2766 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2767 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2768 } else a->garray = 0; 2769 2770 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2771 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2772 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2773 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2774 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2776 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2777 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2778 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2779 *newmat = mat; 2780 PetscFunctionReturn(0); 2781 } 2782 2783 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2784 { 2785 PetscScalar *vals,*svals; 2786 MPI_Comm comm; 2787 PetscErrorCode ierr; 2788 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2789 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2790 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2791 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2792 PetscInt cend,cstart,n,*rowners; 2793 int fd; 2794 PetscInt bs = newMat->rmap->bs; 2795 2796 PetscFunctionBegin; 2797 /* force binary viewer to load .info file if it has not yet done so */ 2798 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2799 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2800 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2801 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2802 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2803 if (!rank) { 2804 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2805 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2806 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2807 } 2808 2809 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2810 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2811 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2812 if (bs < 0) bs = 1; 2813 2814 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2815 M = header[1]; N = header[2]; 2816 2817 /* If global sizes are set, check if they are consistent with that given in the file */ 2818 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2819 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2820 2821 /* determine ownership of all (block) rows */ 2822 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2823 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2824 else m = newMat->rmap->n; /* Set by user */ 2825 2826 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2827 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2828 2829 /* First process needs enough room for process with most rows */ 2830 if (!rank) { 2831 mmax = rowners[1]; 2832 for (i=2; i<=size; i++) { 2833 mmax = PetscMax(mmax, rowners[i]); 2834 } 2835 } else mmax = -1; /* unused, but compilers complain */ 2836 2837 rowners[0] = 0; 2838 for (i=2; i<=size; i++) { 2839 rowners[i] += rowners[i-1]; 2840 } 2841 rstart = rowners[rank]; 2842 rend = rowners[rank+1]; 2843 2844 /* distribute row lengths to all processors */ 2845 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2846 if (!rank) { 2847 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2848 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2849 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2850 for (j=0; j<m; j++) { 2851 procsnz[0] += ourlens[j]; 2852 } 2853 for (i=1; i<size; i++) { 2854 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2855 /* calculate the number of nonzeros on each processor */ 2856 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2857 procsnz[i] += rowlengths[j]; 2858 } 2859 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2860 } 2861 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2862 } else { 2863 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2864 } 2865 2866 if (!rank) { 2867 /* determine max buffer needed and allocate it */ 2868 maxnz = 0; 2869 for (i=0; i<size; i++) { 2870 maxnz = PetscMax(maxnz,procsnz[i]); 2871 } 2872 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2873 2874 /* read in my part of the matrix column indices */ 2875 nz = procsnz[0]; 2876 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2877 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2878 2879 /* read in every one elses and ship off */ 2880 for (i=1; i<size; i++) { 2881 nz = procsnz[i]; 2882 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2883 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2884 } 2885 ierr = PetscFree(cols);CHKERRQ(ierr); 2886 } else { 2887 /* determine buffer space needed for message */ 2888 nz = 0; 2889 for (i=0; i<m; i++) { 2890 nz += ourlens[i]; 2891 } 2892 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2893 2894 /* receive message of column indices*/ 2895 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2896 } 2897 2898 /* determine column ownership if matrix is not square */ 2899 if (N != M) { 2900 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2901 else n = newMat->cmap->n; 2902 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2903 cstart = cend - n; 2904 } else { 2905 cstart = rstart; 2906 cend = rend; 2907 n = cend - cstart; 2908 } 2909 2910 /* loop over local rows, determining number of off diagonal entries */ 2911 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2912 jj = 0; 2913 for (i=0; i<m; i++) { 2914 for (j=0; j<ourlens[i]; j++) { 2915 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2916 jj++; 2917 } 2918 } 2919 2920 for (i=0; i<m; i++) { 2921 ourlens[i] -= offlens[i]; 2922 } 2923 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2924 2925 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2926 2927 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2928 2929 for (i=0; i<m; i++) { 2930 ourlens[i] += offlens[i]; 2931 } 2932 2933 if (!rank) { 2934 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2935 2936 /* read in my part of the matrix numerical values */ 2937 nz = procsnz[0]; 2938 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2939 2940 /* insert into matrix */ 2941 jj = rstart; 2942 smycols = mycols; 2943 svals = vals; 2944 for (i=0; i<m; i++) { 2945 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2946 smycols += ourlens[i]; 2947 svals += ourlens[i]; 2948 jj++; 2949 } 2950 2951 /* read in other processors and ship out */ 2952 for (i=1; i<size; i++) { 2953 nz = procsnz[i]; 2954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2955 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2956 } 2957 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2958 } else { 2959 /* receive numeric values */ 2960 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2961 2962 /* receive message of values*/ 2963 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2964 2965 /* insert into matrix */ 2966 jj = rstart; 2967 smycols = mycols; 2968 svals = vals; 2969 for (i=0; i<m; i++) { 2970 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2971 smycols += ourlens[i]; 2972 svals += ourlens[i]; 2973 jj++; 2974 } 2975 } 2976 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2977 ierr = PetscFree(vals);CHKERRQ(ierr); 2978 ierr = PetscFree(mycols);CHKERRQ(ierr); 2979 ierr = PetscFree(rowners);CHKERRQ(ierr); 2980 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 PetscFunctionReturn(0); 2983 } 2984 2985 /* Not scalable because of ISAllGather() unless getting all columns. */ 2986 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2987 { 2988 PetscErrorCode ierr; 2989 IS iscol_local; 2990 PetscBool isstride; 2991 PetscMPIInt lisstride=0,gisstride; 2992 2993 PetscFunctionBegin; 2994 /* check if we are grabbing all columns*/ 2995 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2996 2997 if (isstride) { 2998 PetscInt start,len,mstart,mlen; 2999 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3000 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3001 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3002 if (mstart == start && mlen-mstart == len) lisstride = 1; 3003 } 3004 3005 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3006 if (gisstride) { 3007 PetscInt N; 3008 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3009 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3010 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3011 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3012 } else { 3013 PetscInt cbs; 3014 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3015 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3016 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3017 } 3018 3019 *isseq = iscol_local; 3020 PetscFunctionReturn(0); 3021 } 3022 3023 /* 3024 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3025 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3026 3027 Input Parameters: 3028 mat - matrix 3029 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3030 i.e., mat->rstart <= isrow[i] < mat->rend 3031 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3032 i.e., mat->cstart <= iscol[i] < mat->cend 3033 Output Parameter: 3034 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3035 iscol_o - sequential column index set for retrieving mat->B 3036 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3037 */ 3038 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3039 { 3040 PetscErrorCode ierr; 3041 Vec x,cmap; 3042 const PetscInt *is_idx; 3043 PetscScalar *xarray,*cmaparray; 3044 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3045 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3046 Mat B=a->B; 3047 Vec lvec=a->lvec,lcmap; 3048 PetscInt i,cstart,cend,Bn=B->cmap->N; 3049 MPI_Comm comm; 3050 VecScatter Mvctx=a->Mvctx; 3051 3052 PetscFunctionBegin; 3053 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3054 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3055 3056 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3057 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3058 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3059 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3060 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3061 3062 /* Get start indices */ 3063 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3064 isstart -= ncols; 3065 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3066 3067 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3068 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3069 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3070 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3071 for (i=0; i<ncols; i++) { 3072 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3073 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3074 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3075 } 3076 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3077 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3078 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3079 3080 /* Get iscol_d */ 3081 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3082 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3083 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3084 3085 /* Get isrow_d */ 3086 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3087 rstart = mat->rmap->rstart; 3088 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3089 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3090 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3091 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3092 3093 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3094 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3095 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3096 3097 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3098 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 3101 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3102 3103 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3105 3106 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3107 /* off-process column indices */ 3108 count = 0; 3109 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3110 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3111 3112 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3113 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3114 for (i=0; i<Bn; i++) { 3115 if (PetscRealPart(xarray[i]) > -1.0) { 3116 idx[count] = i; /* local column index in off-diagonal part B */ 3117 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3118 count++; 3119 } 3120 } 3121 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3122 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3123 3124 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3125 /* cannot ensure iscol_o has same blocksize as iscol! */ 3126 3127 ierr = PetscFree(idx);CHKERRQ(ierr); 3128 *garray = cmap1; 3129 3130 ierr = VecDestroy(&x);CHKERRQ(ierr); 3131 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3132 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3137 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3138 { 3139 PetscErrorCode ierr; 3140 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3141 Mat M = NULL; 3142 MPI_Comm comm; 3143 IS iscol_d,isrow_d,iscol_o; 3144 Mat Asub = NULL,Bsub = NULL; 3145 PetscInt n; 3146 3147 PetscFunctionBegin; 3148 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3149 3150 if (call == MAT_REUSE_MATRIX) { 3151 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3152 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3153 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3154 3155 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3156 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3157 3158 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3159 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3160 3161 /* Update diagonal and off-diagonal portions of submat */ 3162 asub = (Mat_MPIAIJ*)(*submat)->data; 3163 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3164 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3165 if (n) { 3166 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3167 } 3168 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3170 3171 } else { /* call == MAT_INITIAL_MATRIX) */ 3172 const PetscInt *garray; 3173 PetscInt BsubN; 3174 3175 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3176 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3177 3178 /* Create local submatrices Asub and Bsub */ 3179 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3180 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3181 3182 /* Create submatrix M */ 3183 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3184 3185 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3186 asub = (Mat_MPIAIJ*)M->data; 3187 3188 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3189 n = asub->B->cmap->N; 3190 if (BsubN > n) { 3191 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3192 const PetscInt *idx; 3193 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3194 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3195 3196 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3197 j = 0; 3198 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3199 for (i=0; i<n; i++) { 3200 if (j >= BsubN) break; 3201 while (subgarray[i] > garray[j]) j++; 3202 3203 if (subgarray[i] == garray[j]) { 3204 idx_new[i] = idx[j++]; 3205 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3206 } 3207 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3208 3209 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3210 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3211 3212 } else if (BsubN < n) { 3213 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3214 } 3215 3216 ierr = PetscFree(garray);CHKERRQ(ierr); 3217 *submat = M; 3218 3219 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3220 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3221 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3222 3223 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3224 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3225 3226 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3227 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3228 } 3229 PetscFunctionReturn(0); 3230 } 3231 3232 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3233 { 3234 PetscErrorCode ierr; 3235 IS iscol_local=NULL,isrow_d; 3236 PetscInt csize; 3237 PetscInt n,i,j,start,end; 3238 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3239 MPI_Comm comm; 3240 3241 PetscFunctionBegin; 3242 /* If isrow has same processor distribution as mat, 3243 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3244 if (call == MAT_REUSE_MATRIX) { 3245 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3246 if (isrow_d) { 3247 sameRowDist = PETSC_TRUE; 3248 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3249 } else { 3250 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3251 if (iscol_local) { 3252 sameRowDist = PETSC_TRUE; 3253 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3254 } 3255 } 3256 } else { 3257 /* Check if isrow has same processor distribution as mat */ 3258 sameDist[0] = PETSC_FALSE; 3259 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3260 if (!n) { 3261 sameDist[0] = PETSC_TRUE; 3262 } else { 3263 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3264 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3265 if (i >= start && j < end) { 3266 sameDist[0] = PETSC_TRUE; 3267 } 3268 } 3269 3270 /* Check if iscol has same processor distribution as mat */ 3271 sameDist[1] = PETSC_FALSE; 3272 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3273 if (!n) { 3274 sameDist[1] = PETSC_TRUE; 3275 } else { 3276 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3277 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3278 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3279 } 3280 3281 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3282 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3283 sameRowDist = tsameDist[0]; 3284 } 3285 3286 if (sameRowDist) { 3287 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3288 /* isrow and iscol have same processor distribution as mat */ 3289 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } else { /* sameRowDist */ 3292 /* isrow has same processor distribution as mat */ 3293 if (call == MAT_INITIAL_MATRIX) { 3294 PetscBool sorted; 3295 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3296 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3297 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3298 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3299 3300 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3301 if (sorted) { 3302 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3303 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 } else { /* call == MAT_REUSE_MATRIX */ 3307 IS iscol_sub; 3308 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3309 if (iscol_sub) { 3310 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3311 PetscFunctionReturn(0); 3312 } 3313 } 3314 } 3315 } 3316 3317 /* General case: iscol -> iscol_local which has global size of iscol */ 3318 if (call == MAT_REUSE_MATRIX) { 3319 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3320 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3321 } else { 3322 if (!iscol_local) { 3323 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3324 } 3325 } 3326 3327 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3328 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3329 3330 if (call == MAT_INITIAL_MATRIX) { 3331 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3332 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3333 } 3334 PetscFunctionReturn(0); 3335 } 3336 3337 /*@C 3338 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3339 and "off-diagonal" part of the matrix in CSR format. 3340 3341 Collective on MPI_Comm 3342 3343 Input Parameters: 3344 + comm - MPI communicator 3345 . A - "diagonal" portion of matrix 3346 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3347 - garray - global index of B columns 3348 3349 Output Parameter: 3350 . mat - the matrix, with input A as its local diagonal matrix 3351 Level: advanced 3352 3353 Notes: 3354 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3355 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3356 3357 .seealso: MatCreateMPIAIJWithSplitArrays() 3358 @*/ 3359 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3360 { 3361 PetscErrorCode ierr; 3362 Mat_MPIAIJ *maij; 3363 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3364 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3365 PetscScalar *oa=b->a; 3366 Mat Bnew; 3367 PetscInt m,n,N; 3368 3369 PetscFunctionBegin; 3370 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3371 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3372 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3373 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3374 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3375 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3376 3377 /* Get global columns of mat */ 3378 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3379 3380 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3381 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3382 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3383 maij = (Mat_MPIAIJ*)(*mat)->data; 3384 3385 (*mat)->preallocated = PETSC_TRUE; 3386 3387 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3388 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3389 3390 /* Set A as diagonal portion of *mat */ 3391 maij->A = A; 3392 3393 nz = oi[m]; 3394 for (i=0; i<nz; i++) { 3395 col = oj[i]; 3396 oj[i] = garray[col]; 3397 } 3398 3399 /* Set Bnew as off-diagonal portion of *mat */ 3400 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3401 bnew = (Mat_SeqAIJ*)Bnew->data; 3402 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3403 maij->B = Bnew; 3404 3405 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3406 3407 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3408 b->free_a = PETSC_FALSE; 3409 b->free_ij = PETSC_FALSE; 3410 ierr = MatDestroy(&B);CHKERRQ(ierr); 3411 3412 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3413 bnew->free_a = PETSC_TRUE; 3414 bnew->free_ij = PETSC_TRUE; 3415 3416 /* condense columns of maij->B */ 3417 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3418 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3420 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3421 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3426 3427 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3428 { 3429 PetscErrorCode ierr; 3430 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3431 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3432 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3433 Mat M,Msub,B=a->B; 3434 MatScalar *aa; 3435 Mat_SeqAIJ *aij; 3436 PetscInt *garray = a->garray,*colsub,Ncols; 3437 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3438 IS iscol_sub,iscmap; 3439 const PetscInt *is_idx,*cmap; 3440 PetscBool allcolumns=PETSC_FALSE; 3441 MPI_Comm comm; 3442 3443 PetscFunctionBegin; 3444 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3445 3446 if (call == MAT_REUSE_MATRIX) { 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3448 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3449 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3450 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3452 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3453 3454 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3455 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3456 3457 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3458 3459 } else { /* call == MAT_INITIAL_MATRIX) */ 3460 PetscBool flg; 3461 3462 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3463 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3464 3465 /* (1) iscol -> nonscalable iscol_local */ 3466 /* Check for special case: each processor gets entire matrix columns */ 3467 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3468 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3469 if (allcolumns) { 3470 iscol_sub = iscol_local; 3471 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3472 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3473 3474 } else { 3475 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3476 PetscInt *idx,*cmap1,k; 3477 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3479 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3480 count = 0; 3481 k = 0; 3482 for (i=0; i<Ncols; i++) { 3483 j = is_idx[i]; 3484 if (j >= cstart && j < cend) { 3485 /* diagonal part of mat */ 3486 idx[count] = j; 3487 cmap1[count++] = i; /* column index in submat */ 3488 } else if (Bn) { 3489 /* off-diagonal part of mat */ 3490 if (j == garray[k]) { 3491 idx[count] = j; 3492 cmap1[count++] = i; /* column index in submat */ 3493 } else if (j > garray[k]) { 3494 while (j > garray[k] && k < Bn-1) k++; 3495 if (j == garray[k]) { 3496 idx[count] = j; 3497 cmap1[count++] = i; /* column index in submat */ 3498 } 3499 } 3500 } 3501 } 3502 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3503 3504 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3505 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3506 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3507 3508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3509 } 3510 3511 /* (3) Create sequential Msub */ 3512 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3513 } 3514 3515 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3516 aij = (Mat_SeqAIJ*)(Msub)->data; 3517 ii = aij->i; 3518 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3519 3520 /* 3521 m - number of local rows 3522 Ncols - number of columns (same on all processors) 3523 rstart - first row in new global matrix generated 3524 */ 3525 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3526 3527 if (call == MAT_INITIAL_MATRIX) { 3528 /* (4) Create parallel newmat */ 3529 PetscMPIInt rank,size; 3530 PetscInt csize; 3531 3532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3533 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3534 3535 /* 3536 Determine the number of non-zeros in the diagonal and off-diagonal 3537 portions of the matrix in order to do correct preallocation 3538 */ 3539 3540 /* first get start and end of "diagonal" columns */ 3541 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3542 if (csize == PETSC_DECIDE) { 3543 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3544 if (mglobal == Ncols) { /* square matrix */ 3545 nlocal = m; 3546 } else { 3547 nlocal = Ncols/size + ((Ncols % size) > rank); 3548 } 3549 } else { 3550 nlocal = csize; 3551 } 3552 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3553 rstart = rend - nlocal; 3554 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3555 3556 /* next, compute all the lengths */ 3557 jj = aij->j; 3558 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3559 olens = dlens + m; 3560 for (i=0; i<m; i++) { 3561 jend = ii[i+1] - ii[i]; 3562 olen = 0; 3563 dlen = 0; 3564 for (j=0; j<jend; j++) { 3565 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3566 else dlen++; 3567 jj++; 3568 } 3569 olens[i] = olen; 3570 dlens[i] = dlen; 3571 } 3572 3573 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3574 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3575 3576 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3577 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3578 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3579 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3580 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3581 ierr = PetscFree(dlens);CHKERRQ(ierr); 3582 3583 } else { /* call == MAT_REUSE_MATRIX */ 3584 M = *newmat; 3585 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3586 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3587 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3588 /* 3589 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3590 rather than the slower MatSetValues(). 3591 */ 3592 M->was_assembled = PETSC_TRUE; 3593 M->assembled = PETSC_FALSE; 3594 } 3595 3596 /* (5) Set values of Msub to *newmat */ 3597 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3598 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3599 3600 jj = aij->j; 3601 aa = aij->a; 3602 for (i=0; i<m; i++) { 3603 row = rstart + i; 3604 nz = ii[i+1] - ii[i]; 3605 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3606 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3607 jj += nz; aa += nz; 3608 } 3609 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3610 3611 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 3614 ierr = PetscFree(colsub);CHKERRQ(ierr); 3615 3616 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3617 if (call == MAT_INITIAL_MATRIX) { 3618 *newmat = M; 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3620 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3621 3622 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3623 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3624 3625 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3626 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3627 3628 if (iscol_local) { 3629 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3630 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3631 } 3632 } 3633 PetscFunctionReturn(0); 3634 } 3635 3636 /* 3637 Not great since it makes two copies of the submatrix, first an SeqAIJ 3638 in local and then by concatenating the local matrices the end result. 3639 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3640 3641 Note: This requires a sequential iscol with all indices. 3642 */ 3643 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3644 { 3645 PetscErrorCode ierr; 3646 PetscMPIInt rank,size; 3647 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3648 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3649 Mat M,Mreuse; 3650 MatScalar *aa,*vwork; 3651 MPI_Comm comm; 3652 Mat_SeqAIJ *aij; 3653 PetscBool colflag,allcolumns=PETSC_FALSE; 3654 3655 PetscFunctionBegin; 3656 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3659 3660 /* Check for special case: each processor gets entire matrix columns */ 3661 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3662 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3663 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3664 3665 if (call == MAT_REUSE_MATRIX) { 3666 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3667 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3669 } else { 3670 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3671 } 3672 3673 /* 3674 m - number of local rows 3675 n - number of columns (same on all processors) 3676 rstart - first row in new global matrix generated 3677 */ 3678 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3679 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3680 if (call == MAT_INITIAL_MATRIX) { 3681 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3682 ii = aij->i; 3683 jj = aij->j; 3684 3685 /* 3686 Determine the number of non-zeros in the diagonal and off-diagonal 3687 portions of the matrix in order to do correct preallocation 3688 */ 3689 3690 /* first get start and end of "diagonal" columns */ 3691 if (csize == PETSC_DECIDE) { 3692 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3693 if (mglobal == n) { /* square matrix */ 3694 nlocal = m; 3695 } else { 3696 nlocal = n/size + ((n % size) > rank); 3697 } 3698 } else { 3699 nlocal = csize; 3700 } 3701 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3702 rstart = rend - nlocal; 3703 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3704 3705 /* next, compute all the lengths */ 3706 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3707 olens = dlens + m; 3708 for (i=0; i<m; i++) { 3709 jend = ii[i+1] - ii[i]; 3710 olen = 0; 3711 dlen = 0; 3712 for (j=0; j<jend; j++) { 3713 if (*jj < rstart || *jj >= rend) olen++; 3714 else dlen++; 3715 jj++; 3716 } 3717 olens[i] = olen; 3718 dlens[i] = dlen; 3719 } 3720 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3721 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3722 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3723 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3724 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3725 ierr = PetscFree(dlens);CHKERRQ(ierr); 3726 } else { 3727 PetscInt ml,nl; 3728 3729 M = *newmat; 3730 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3731 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3732 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3733 /* 3734 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3735 rather than the slower MatSetValues(). 3736 */ 3737 M->was_assembled = PETSC_TRUE; 3738 M->assembled = PETSC_FALSE; 3739 } 3740 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3741 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3742 ii = aij->i; 3743 jj = aij->j; 3744 aa = aij->a; 3745 for (i=0; i<m; i++) { 3746 row = rstart + i; 3747 nz = ii[i+1] - ii[i]; 3748 cwork = jj; jj += nz; 3749 vwork = aa; aa += nz; 3750 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3751 } 3752 3753 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 *newmat = M; 3756 3757 /* save submatrix used in processor for next request */ 3758 if (call == MAT_INITIAL_MATRIX) { 3759 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3760 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3766 { 3767 PetscInt m,cstart, cend,j,nnz,i,d; 3768 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3769 const PetscInt *JJ; 3770 PetscScalar *values; 3771 PetscErrorCode ierr; 3772 PetscBool nooffprocentries; 3773 3774 PetscFunctionBegin; 3775 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3776 3777 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3778 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3779 m = B->rmap->n; 3780 cstart = B->cmap->rstart; 3781 cend = B->cmap->rend; 3782 rstart = B->rmap->rstart; 3783 3784 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3785 3786 #if defined(PETSC_USE_DEBUGGING) 3787 for (i=0; i<m; i++) { 3788 nnz = Ii[i+1]- Ii[i]; 3789 JJ = J + Ii[i]; 3790 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3791 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3792 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3793 } 3794 #endif 3795 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 nnz_max = PetscMax(nnz_max,nnz); 3800 d = 0; 3801 for (j=0; j<nnz; j++) { 3802 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3803 } 3804 d_nnz[i] = d; 3805 o_nnz[i] = nnz - d; 3806 } 3807 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3808 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3809 3810 if (v) values = (PetscScalar*)v; 3811 else { 3812 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3813 } 3814 3815 for (i=0; i<m; i++) { 3816 ii = i + rstart; 3817 nnz = Ii[i+1]- Ii[i]; 3818 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3819 } 3820 nooffprocentries = B->nooffprocentries; 3821 B->nooffprocentries = PETSC_TRUE; 3822 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3823 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3824 B->nooffprocentries = nooffprocentries; 3825 3826 if (!v) { 3827 ierr = PetscFree(values);CHKERRQ(ierr); 3828 } 3829 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832 3833 /*@ 3834 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3835 (the default parallel PETSc format). 3836 3837 Collective on MPI_Comm 3838 3839 Input Parameters: 3840 + B - the matrix 3841 . i - the indices into j for the start of each local row (starts with zero) 3842 . j - the column indices for each local row (starts with zero) 3843 - v - optional values in the matrix 3844 3845 Level: developer 3846 3847 Notes: 3848 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3849 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3850 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3851 3852 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3853 3854 The format which is used for the sparse matrix input, is equivalent to a 3855 row-major ordering.. i.e for the following matrix, the input data expected is 3856 as shown 3857 3858 $ 1 0 0 3859 $ 2 0 3 P0 3860 $ ------- 3861 $ 4 5 6 P1 3862 $ 3863 $ Process0 [P0]: rows_owned=[0,1] 3864 $ i = {0,1,3} [size = nrow+1 = 2+1] 3865 $ j = {0,0,2} [size = 3] 3866 $ v = {1,2,3} [size = 3] 3867 $ 3868 $ Process1 [P1]: rows_owned=[2] 3869 $ i = {0,3} [size = nrow+1 = 1+1] 3870 $ j = {0,1,2} [size = 3] 3871 $ v = {4,5,6} [size = 3] 3872 3873 .keywords: matrix, aij, compressed row, sparse, parallel 3874 3875 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3876 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3877 @*/ 3878 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3879 { 3880 PetscErrorCode ierr; 3881 3882 PetscFunctionBegin; 3883 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3884 PetscFunctionReturn(0); 3885 } 3886 3887 /*@C 3888 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3889 (the default parallel PETSc format). For good matrix assembly performance 3890 the user should preallocate the matrix storage by setting the parameters 3891 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3892 performance can be increased by more than a factor of 50. 3893 3894 Collective on MPI_Comm 3895 3896 Input Parameters: 3897 + B - the matrix 3898 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3899 (same value is used for all local rows) 3900 . d_nnz - array containing the number of nonzeros in the various rows of the 3901 DIAGONAL portion of the local submatrix (possibly different for each row) 3902 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3903 The size of this array is equal to the number of local rows, i.e 'm'. 3904 For matrices that will be factored, you must leave room for (and set) 3905 the diagonal entry even if it is zero. 3906 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3907 submatrix (same value is used for all local rows). 3908 - o_nnz - array containing the number of nonzeros in the various rows of the 3909 OFF-DIAGONAL portion of the local submatrix (possibly different for 3910 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3911 structure. The size of this array is equal to the number 3912 of local rows, i.e 'm'. 3913 3914 If the *_nnz parameter is given then the *_nz parameter is ignored 3915 3916 The AIJ format (also called the Yale sparse matrix format or 3917 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3918 storage. The stored row and column indices begin with zero. 3919 See Users-Manual: ch_mat for details. 3920 3921 The parallel matrix is partitioned such that the first m0 rows belong to 3922 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3923 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3924 3925 The DIAGONAL portion of the local submatrix of a processor can be defined 3926 as the submatrix which is obtained by extraction the part corresponding to 3927 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3928 first row that belongs to the processor, r2 is the last row belonging to 3929 the this processor, and c1-c2 is range of indices of the local part of a 3930 vector suitable for applying the matrix to. This is an mxn matrix. In the 3931 common case of a square matrix, the row and column ranges are the same and 3932 the DIAGONAL part is also square. The remaining portion of the local 3933 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3934 3935 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3936 3937 You can call MatGetInfo() to get information on how effective the preallocation was; 3938 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3939 You can also run with the option -info and look for messages with the string 3940 malloc in them to see if additional memory allocation was needed. 3941 3942 Example usage: 3943 3944 Consider the following 8x8 matrix with 34 non-zero values, that is 3945 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3946 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3947 as follows: 3948 3949 .vb 3950 1 2 0 | 0 3 0 | 0 4 3951 Proc0 0 5 6 | 7 0 0 | 8 0 3952 9 0 10 | 11 0 0 | 12 0 3953 ------------------------------------- 3954 13 0 14 | 15 16 17 | 0 0 3955 Proc1 0 18 0 | 19 20 21 | 0 0 3956 0 0 0 | 22 23 0 | 24 0 3957 ------------------------------------- 3958 Proc2 25 26 27 | 0 0 28 | 29 0 3959 30 0 0 | 31 32 33 | 0 34 3960 .ve 3961 3962 This can be represented as a collection of submatrices as: 3963 3964 .vb 3965 A B C 3966 D E F 3967 G H I 3968 .ve 3969 3970 Where the submatrices A,B,C are owned by proc0, D,E,F are 3971 owned by proc1, G,H,I are owned by proc2. 3972 3973 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3974 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3975 The 'M','N' parameters are 8,8, and have the same values on all procs. 3976 3977 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3978 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3979 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3980 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3981 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3982 matrix, ans [DF] as another SeqAIJ matrix. 3983 3984 When d_nz, o_nz parameters are specified, d_nz storage elements are 3985 allocated for every row of the local diagonal submatrix, and o_nz 3986 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3987 One way to choose d_nz and o_nz is to use the max nonzerors per local 3988 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3989 In this case, the values of d_nz,o_nz are: 3990 .vb 3991 proc0 : dnz = 2, o_nz = 2 3992 proc1 : dnz = 3, o_nz = 2 3993 proc2 : dnz = 1, o_nz = 4 3994 .ve 3995 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3996 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3997 for proc3. i.e we are using 12+15+10=37 storage locations to store 3998 34 values. 3999 4000 When d_nnz, o_nnz parameters are specified, the storage is specified 4001 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4002 In the above case the values for d_nnz,o_nnz are: 4003 .vb 4004 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4005 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4006 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4007 .ve 4008 Here the space allocated is sum of all the above values i.e 34, and 4009 hence pre-allocation is perfect. 4010 4011 Level: intermediate 4012 4013 .keywords: matrix, aij, compressed row, sparse, parallel 4014 4015 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4016 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4017 @*/ 4018 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4019 { 4020 PetscErrorCode ierr; 4021 4022 PetscFunctionBegin; 4023 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4024 PetscValidType(B,1); 4025 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4026 PetscFunctionReturn(0); 4027 } 4028 4029 /*@ 4030 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4031 CSR format the local rows. 4032 4033 Collective on MPI_Comm 4034 4035 Input Parameters: 4036 + comm - MPI communicator 4037 . m - number of local rows (Cannot be PETSC_DECIDE) 4038 . n - This value should be the same as the local size used in creating the 4039 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4040 calculated if N is given) For square matrices n is almost always m. 4041 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4042 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4043 . i - row indices 4044 . j - column indices 4045 - a - matrix values 4046 4047 Output Parameter: 4048 . mat - the matrix 4049 4050 Level: intermediate 4051 4052 Notes: 4053 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4054 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4055 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4056 4057 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4058 4059 The format which is used for the sparse matrix input, is equivalent to a 4060 row-major ordering.. i.e for the following matrix, the input data expected is 4061 as shown 4062 4063 $ 1 0 0 4064 $ 2 0 3 P0 4065 $ ------- 4066 $ 4 5 6 P1 4067 $ 4068 $ Process0 [P0]: rows_owned=[0,1] 4069 $ i = {0,1,3} [size = nrow+1 = 2+1] 4070 $ j = {0,0,2} [size = 3] 4071 $ v = {1,2,3} [size = 3] 4072 $ 4073 $ Process1 [P1]: rows_owned=[2] 4074 $ i = {0,3} [size = nrow+1 = 1+1] 4075 $ j = {0,1,2} [size = 3] 4076 $ v = {4,5,6} [size = 3] 4077 4078 .keywords: matrix, aij, compressed row, sparse, parallel 4079 4080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4081 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4082 @*/ 4083 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4084 { 4085 PetscErrorCode ierr; 4086 4087 PetscFunctionBegin; 4088 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4089 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4090 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4091 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4092 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4093 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4094 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4095 PetscFunctionReturn(0); 4096 } 4097 4098 /*@C 4099 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4100 (the default parallel PETSc format). For good matrix assembly performance 4101 the user should preallocate the matrix storage by setting the parameters 4102 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4103 performance can be increased by more than a factor of 50. 4104 4105 Collective on MPI_Comm 4106 4107 Input Parameters: 4108 + comm - MPI communicator 4109 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4110 This value should be the same as the local size used in creating the 4111 y vector for the matrix-vector product y = Ax. 4112 . n - This value should be the same as the local size used in creating the 4113 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4114 calculated if N is given) For square matrices n is almost always m. 4115 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4116 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4117 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4118 (same value is used for all local rows) 4119 . d_nnz - array containing the number of nonzeros in the various rows of the 4120 DIAGONAL portion of the local submatrix (possibly different for each row) 4121 or NULL, if d_nz is used to specify the nonzero structure. 4122 The size of this array is equal to the number of local rows, i.e 'm'. 4123 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4124 submatrix (same value is used for all local rows). 4125 - o_nnz - array containing the number of nonzeros in the various rows of the 4126 OFF-DIAGONAL portion of the local submatrix (possibly different for 4127 each row) or NULL, if o_nz is used to specify the nonzero 4128 structure. The size of this array is equal to the number 4129 of local rows, i.e 'm'. 4130 4131 Output Parameter: 4132 . A - the matrix 4133 4134 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4135 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4136 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4137 4138 Notes: 4139 If the *_nnz parameter is given then the *_nz parameter is ignored 4140 4141 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4142 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4143 storage requirements for this matrix. 4144 4145 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4146 processor than it must be used on all processors that share the object for 4147 that argument. 4148 4149 The user MUST specify either the local or global matrix dimensions 4150 (possibly both). 4151 4152 The parallel matrix is partitioned across processors such that the 4153 first m0 rows belong to process 0, the next m1 rows belong to 4154 process 1, the next m2 rows belong to process 2 etc.. where 4155 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4156 values corresponding to [m x N] submatrix. 4157 4158 The columns are logically partitioned with the n0 columns belonging 4159 to 0th partition, the next n1 columns belonging to the next 4160 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4161 4162 The DIAGONAL portion of the local submatrix on any given processor 4163 is the submatrix corresponding to the rows and columns m,n 4164 corresponding to the given processor. i.e diagonal matrix on 4165 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4166 etc. The remaining portion of the local submatrix [m x (N-n)] 4167 constitute the OFF-DIAGONAL portion. The example below better 4168 illustrates this concept. 4169 4170 For a square global matrix we define each processor's diagonal portion 4171 to be its local rows and the corresponding columns (a square submatrix); 4172 each processor's off-diagonal portion encompasses the remainder of the 4173 local matrix (a rectangular submatrix). 4174 4175 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4176 4177 When calling this routine with a single process communicator, a matrix of 4178 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4179 type of communicator, use the construction mechanism 4180 .vb 4181 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4182 .ve 4183 4184 $ MatCreate(...,&A); 4185 $ MatSetType(A,MATMPIAIJ); 4186 $ MatSetSizes(A, m,n,M,N); 4187 $ MatMPIAIJSetPreallocation(A,...); 4188 4189 By default, this format uses inodes (identical nodes) when possible. 4190 We search for consecutive rows with the same nonzero structure, thereby 4191 reusing matrix information to achieve increased efficiency. 4192 4193 Options Database Keys: 4194 + -mat_no_inode - Do not use inodes 4195 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4196 - -mat_aij_oneindex - Internally use indexing starting at 1 4197 rather than 0. Note that when calling MatSetValues(), 4198 the user still MUST index entries starting at 0! 4199 4200 4201 Example usage: 4202 4203 Consider the following 8x8 matrix with 34 non-zero values, that is 4204 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4205 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4206 as follows 4207 4208 .vb 4209 1 2 0 | 0 3 0 | 0 4 4210 Proc0 0 5 6 | 7 0 0 | 8 0 4211 9 0 10 | 11 0 0 | 12 0 4212 ------------------------------------- 4213 13 0 14 | 15 16 17 | 0 0 4214 Proc1 0 18 0 | 19 20 21 | 0 0 4215 0 0 0 | 22 23 0 | 24 0 4216 ------------------------------------- 4217 Proc2 25 26 27 | 0 0 28 | 29 0 4218 30 0 0 | 31 32 33 | 0 34 4219 .ve 4220 4221 This can be represented as a collection of submatrices as 4222 4223 .vb 4224 A B C 4225 D E F 4226 G H I 4227 .ve 4228 4229 Where the submatrices A,B,C are owned by proc0, D,E,F are 4230 owned by proc1, G,H,I are owned by proc2. 4231 4232 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4233 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4234 The 'M','N' parameters are 8,8, and have the same values on all procs. 4235 4236 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4237 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4238 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4239 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4240 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4241 matrix, ans [DF] as another SeqAIJ matrix. 4242 4243 When d_nz, o_nz parameters are specified, d_nz storage elements are 4244 allocated for every row of the local diagonal submatrix, and o_nz 4245 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4246 One way to choose d_nz and o_nz is to use the max nonzerors per local 4247 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4248 In this case, the values of d_nz,o_nz are 4249 .vb 4250 proc0 : dnz = 2, o_nz = 2 4251 proc1 : dnz = 3, o_nz = 2 4252 proc2 : dnz = 1, o_nz = 4 4253 .ve 4254 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4255 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4256 for proc3. i.e we are using 12+15+10=37 storage locations to store 4257 34 values. 4258 4259 When d_nnz, o_nnz parameters are specified, the storage is specified 4260 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4261 In the above case the values for d_nnz,o_nnz are 4262 .vb 4263 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4264 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4265 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4266 .ve 4267 Here the space allocated is sum of all the above values i.e 34, and 4268 hence pre-allocation is perfect. 4269 4270 Level: intermediate 4271 4272 .keywords: matrix, aij, compressed row, sparse, parallel 4273 4274 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4275 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4276 @*/ 4277 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4278 { 4279 PetscErrorCode ierr; 4280 PetscMPIInt size; 4281 4282 PetscFunctionBegin; 4283 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4284 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4285 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4286 if (size > 1) { 4287 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4288 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4289 } else { 4290 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4291 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4292 } 4293 PetscFunctionReturn(0); 4294 } 4295 4296 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4297 { 4298 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4299 PetscBool flg; 4300 PetscErrorCode ierr; 4301 4302 PetscFunctionBegin; 4303 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4304 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4305 if (Ad) *Ad = a->A; 4306 if (Ao) *Ao = a->B; 4307 if (colmap) *colmap = a->garray; 4308 PetscFunctionReturn(0); 4309 } 4310 4311 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4312 { 4313 PetscErrorCode ierr; 4314 PetscInt m,N,i,rstart,nnz,Ii; 4315 PetscInt *indx; 4316 PetscScalar *values; 4317 4318 PetscFunctionBegin; 4319 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4320 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4321 PetscInt *dnz,*onz,sum,bs,cbs; 4322 4323 if (n == PETSC_DECIDE) { 4324 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4325 } 4326 /* Check sum(n) = N */ 4327 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4328 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4329 4330 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4331 rstart -= m; 4332 4333 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4334 for (i=0; i<m; i++) { 4335 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4336 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4337 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4338 } 4339 4340 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4341 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4342 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4343 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4344 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4345 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4346 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4347 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4348 } 4349 4350 /* numeric phase */ 4351 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4352 for (i=0; i<m; i++) { 4353 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4354 Ii = i + rstart; 4355 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4356 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4357 } 4358 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4360 PetscFunctionReturn(0); 4361 } 4362 4363 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4364 { 4365 PetscErrorCode ierr; 4366 PetscMPIInt rank; 4367 PetscInt m,N,i,rstart,nnz; 4368 size_t len; 4369 const PetscInt *indx; 4370 PetscViewer out; 4371 char *name; 4372 Mat B; 4373 const PetscScalar *values; 4374 4375 PetscFunctionBegin; 4376 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4377 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4378 /* Should this be the type of the diagonal block of A? */ 4379 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4380 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4381 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4382 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4383 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4384 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4385 for (i=0; i<m; i++) { 4386 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4387 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4388 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4389 } 4390 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4391 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4392 4393 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4394 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4395 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4396 sprintf(name,"%s.%d",outfile,rank); 4397 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4398 ierr = PetscFree(name);CHKERRQ(ierr); 4399 ierr = MatView(B,out);CHKERRQ(ierr); 4400 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4401 ierr = MatDestroy(&B);CHKERRQ(ierr); 4402 PetscFunctionReturn(0); 4403 } 4404 4405 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4406 { 4407 PetscErrorCode ierr; 4408 Mat_Merge_SeqsToMPI *merge; 4409 PetscContainer container; 4410 4411 PetscFunctionBegin; 4412 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4413 if (container) { 4414 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4420 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4421 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4422 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4423 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4424 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4425 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4426 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4427 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4428 ierr = PetscFree(merge);CHKERRQ(ierr); 4429 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4430 } 4431 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 #include <../src/mat/utils/freespace.h> 4436 #include <petscbt.h> 4437 4438 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4439 { 4440 PetscErrorCode ierr; 4441 MPI_Comm comm; 4442 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4443 PetscMPIInt size,rank,taga,*len_s; 4444 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4445 PetscInt proc,m; 4446 PetscInt **buf_ri,**buf_rj; 4447 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4448 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4449 MPI_Request *s_waits,*r_waits; 4450 MPI_Status *status; 4451 MatScalar *aa=a->a; 4452 MatScalar **abuf_r,*ba_i; 4453 Mat_Merge_SeqsToMPI *merge; 4454 PetscContainer container; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4458 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4459 4460 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4461 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4462 4463 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4464 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4465 4466 bi = merge->bi; 4467 bj = merge->bj; 4468 buf_ri = merge->buf_ri; 4469 buf_rj = merge->buf_rj; 4470 4471 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4472 owners = merge->rowmap->range; 4473 len_s = merge->len_s; 4474 4475 /* send and recv matrix values */ 4476 /*-----------------------------*/ 4477 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4478 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4479 4480 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4481 for (proc=0,k=0; proc<size; proc++) { 4482 if (!len_s[proc]) continue; 4483 i = owners[proc]; 4484 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4485 k++; 4486 } 4487 4488 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4489 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4490 ierr = PetscFree(status);CHKERRQ(ierr); 4491 4492 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4493 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4494 4495 /* insert mat values of mpimat */ 4496 /*----------------------------*/ 4497 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4498 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4499 4500 for (k=0; k<merge->nrecv; k++) { 4501 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4502 nrows = *(buf_ri_k[k]); 4503 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4504 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4505 } 4506 4507 /* set values of ba */ 4508 m = merge->rowmap->n; 4509 for (i=0; i<m; i++) { 4510 arow = owners[rank] + i; 4511 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4512 bnzi = bi[i+1] - bi[i]; 4513 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4514 4515 /* add local non-zero vals of this proc's seqmat into ba */ 4516 anzi = ai[arow+1] - ai[arow]; 4517 aj = a->j + ai[arow]; 4518 aa = a->a + ai[arow]; 4519 nextaj = 0; 4520 for (j=0; nextaj<anzi; j++) { 4521 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4522 ba_i[j] += aa[nextaj++]; 4523 } 4524 } 4525 4526 /* add received vals into ba */ 4527 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4528 /* i-th row */ 4529 if (i == *nextrow[k]) { 4530 anzi = *(nextai[k]+1) - *nextai[k]; 4531 aj = buf_rj[k] + *(nextai[k]); 4532 aa = abuf_r[k] + *(nextai[k]); 4533 nextaj = 0; 4534 for (j=0; nextaj<anzi; j++) { 4535 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4536 ba_i[j] += aa[nextaj++]; 4537 } 4538 } 4539 nextrow[k]++; nextai[k]++; 4540 } 4541 } 4542 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4543 } 4544 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4545 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4546 4547 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4548 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4549 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4550 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4551 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4552 PetscFunctionReturn(0); 4553 } 4554 4555 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4556 { 4557 PetscErrorCode ierr; 4558 Mat B_mpi; 4559 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4560 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4561 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4562 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4563 PetscInt len,proc,*dnz,*onz,bs,cbs; 4564 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4565 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4566 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4567 MPI_Status *status; 4568 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4569 PetscBT lnkbt; 4570 Mat_Merge_SeqsToMPI *merge; 4571 PetscContainer container; 4572 4573 PetscFunctionBegin; 4574 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4575 4576 /* make sure it is a PETSc comm */ 4577 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4578 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4579 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4580 4581 ierr = PetscNew(&merge);CHKERRQ(ierr); 4582 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4583 4584 /* determine row ownership */ 4585 /*---------------------------------------------------------*/ 4586 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4587 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4588 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4589 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4590 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4591 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4592 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4593 4594 m = merge->rowmap->n; 4595 owners = merge->rowmap->range; 4596 4597 /* determine the number of messages to send, their lengths */ 4598 /*---------------------------------------------------------*/ 4599 len_s = merge->len_s; 4600 4601 len = 0; /* length of buf_si[] */ 4602 merge->nsend = 0; 4603 for (proc=0; proc<size; proc++) { 4604 len_si[proc] = 0; 4605 if (proc == rank) { 4606 len_s[proc] = 0; 4607 } else { 4608 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4609 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4610 } 4611 if (len_s[proc]) { 4612 merge->nsend++; 4613 nrows = 0; 4614 for (i=owners[proc]; i<owners[proc+1]; i++) { 4615 if (ai[i+1] > ai[i]) nrows++; 4616 } 4617 len_si[proc] = 2*(nrows+1); 4618 len += len_si[proc]; 4619 } 4620 } 4621 4622 /* determine the number and length of messages to receive for ij-structure */ 4623 /*-------------------------------------------------------------------------*/ 4624 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4625 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4626 4627 /* post the Irecv of j-structure */ 4628 /*-------------------------------*/ 4629 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4630 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4631 4632 /* post the Isend of j-structure */ 4633 /*--------------------------------*/ 4634 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4635 4636 for (proc=0, k=0; proc<size; proc++) { 4637 if (!len_s[proc]) continue; 4638 i = owners[proc]; 4639 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4640 k++; 4641 } 4642 4643 /* receives and sends of j-structure are complete */ 4644 /*------------------------------------------------*/ 4645 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4646 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4647 4648 /* send and recv i-structure */ 4649 /*---------------------------*/ 4650 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4651 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4652 4653 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4654 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4655 for (proc=0,k=0; proc<size; proc++) { 4656 if (!len_s[proc]) continue; 4657 /* form outgoing message for i-structure: 4658 buf_si[0]: nrows to be sent 4659 [1:nrows]: row index (global) 4660 [nrows+1:2*nrows+1]: i-structure index 4661 */ 4662 /*-------------------------------------------*/ 4663 nrows = len_si[proc]/2 - 1; 4664 buf_si_i = buf_si + nrows+1; 4665 buf_si[0] = nrows; 4666 buf_si_i[0] = 0; 4667 nrows = 0; 4668 for (i=owners[proc]; i<owners[proc+1]; i++) { 4669 anzi = ai[i+1] - ai[i]; 4670 if (anzi) { 4671 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4672 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4673 nrows++; 4674 } 4675 } 4676 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4677 k++; 4678 buf_si += len_si[proc]; 4679 } 4680 4681 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4682 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4683 4684 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4685 for (i=0; i<merge->nrecv; i++) { 4686 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4687 } 4688 4689 ierr = PetscFree(len_si);CHKERRQ(ierr); 4690 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4691 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4692 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4693 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4694 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4695 ierr = PetscFree(status);CHKERRQ(ierr); 4696 4697 /* compute a local seq matrix in each processor */ 4698 /*----------------------------------------------*/ 4699 /* allocate bi array and free space for accumulating nonzero column info */ 4700 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4701 bi[0] = 0; 4702 4703 /* create and initialize a linked list */ 4704 nlnk = N+1; 4705 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4706 4707 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4708 len = ai[owners[rank+1]] - ai[owners[rank]]; 4709 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4710 4711 current_space = free_space; 4712 4713 /* determine symbolic info for each local row */ 4714 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4715 4716 for (k=0; k<merge->nrecv; k++) { 4717 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4718 nrows = *buf_ri_k[k]; 4719 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4720 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4721 } 4722 4723 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4724 len = 0; 4725 for (i=0; i<m; i++) { 4726 bnzi = 0; 4727 /* add local non-zero cols of this proc's seqmat into lnk */ 4728 arow = owners[rank] + i; 4729 anzi = ai[arow+1] - ai[arow]; 4730 aj = a->j + ai[arow]; 4731 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4732 bnzi += nlnk; 4733 /* add received col data into lnk */ 4734 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4735 if (i == *nextrow[k]) { /* i-th row */ 4736 anzi = *(nextai[k]+1) - *nextai[k]; 4737 aj = buf_rj[k] + *nextai[k]; 4738 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4739 bnzi += nlnk; 4740 nextrow[k]++; nextai[k]++; 4741 } 4742 } 4743 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4744 4745 /* if free space is not available, make more free space */ 4746 if (current_space->local_remaining<bnzi) { 4747 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4748 nspacedouble++; 4749 } 4750 /* copy data into free space, then initialize lnk */ 4751 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4752 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4753 4754 current_space->array += bnzi; 4755 current_space->local_used += bnzi; 4756 current_space->local_remaining -= bnzi; 4757 4758 bi[i+1] = bi[i] + bnzi; 4759 } 4760 4761 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4762 4763 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4764 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4765 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4766 4767 /* create symbolic parallel matrix B_mpi */ 4768 /*---------------------------------------*/ 4769 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4770 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4771 if (n==PETSC_DECIDE) { 4772 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4773 } else { 4774 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4775 } 4776 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4777 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4778 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4779 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4780 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4781 4782 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4783 B_mpi->assembled = PETSC_FALSE; 4784 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4785 merge->bi = bi; 4786 merge->bj = bj; 4787 merge->buf_ri = buf_ri; 4788 merge->buf_rj = buf_rj; 4789 merge->coi = NULL; 4790 merge->coj = NULL; 4791 merge->owners_co = NULL; 4792 4793 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4794 4795 /* attach the supporting struct to B_mpi for reuse */ 4796 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4797 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4798 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4799 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4800 *mpimat = B_mpi; 4801 4802 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4803 PetscFunctionReturn(0); 4804 } 4805 4806 /*@C 4807 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4808 matrices from each processor 4809 4810 Collective on MPI_Comm 4811 4812 Input Parameters: 4813 + comm - the communicators the parallel matrix will live on 4814 . seqmat - the input sequential matrices 4815 . m - number of local rows (or PETSC_DECIDE) 4816 . n - number of local columns (or PETSC_DECIDE) 4817 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4818 4819 Output Parameter: 4820 . mpimat - the parallel matrix generated 4821 4822 Level: advanced 4823 4824 Notes: 4825 The dimensions of the sequential matrix in each processor MUST be the same. 4826 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4827 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4828 @*/ 4829 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4830 { 4831 PetscErrorCode ierr; 4832 PetscMPIInt size; 4833 4834 PetscFunctionBegin; 4835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4836 if (size == 1) { 4837 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4838 if (scall == MAT_INITIAL_MATRIX) { 4839 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4840 } else { 4841 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4842 } 4843 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4844 PetscFunctionReturn(0); 4845 } 4846 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4847 if (scall == MAT_INITIAL_MATRIX) { 4848 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4849 } 4850 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4851 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4852 PetscFunctionReturn(0); 4853 } 4854 4855 /*@ 4856 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4857 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4858 with MatGetSize() 4859 4860 Not Collective 4861 4862 Input Parameters: 4863 + A - the matrix 4864 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4865 4866 Output Parameter: 4867 . A_loc - the local sequential matrix generated 4868 4869 Level: developer 4870 4871 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4872 4873 @*/ 4874 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4875 { 4876 PetscErrorCode ierr; 4877 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4878 Mat_SeqAIJ *mat,*a,*b; 4879 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4880 MatScalar *aa,*ba,*cam; 4881 PetscScalar *ca; 4882 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4883 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4884 PetscBool match; 4885 MPI_Comm comm; 4886 PetscMPIInt size; 4887 4888 PetscFunctionBegin; 4889 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4890 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4891 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4892 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4893 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4894 4895 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4896 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4897 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4898 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4899 aa = a->a; ba = b->a; 4900 if (scall == MAT_INITIAL_MATRIX) { 4901 if (size == 1) { 4902 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4903 PetscFunctionReturn(0); 4904 } 4905 4906 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4907 ci[0] = 0; 4908 for (i=0; i<am; i++) { 4909 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4910 } 4911 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4912 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4913 k = 0; 4914 for (i=0; i<am; i++) { 4915 ncols_o = bi[i+1] - bi[i]; 4916 ncols_d = ai[i+1] - ai[i]; 4917 /* off-diagonal portion of A */ 4918 for (jo=0; jo<ncols_o; jo++) { 4919 col = cmap[*bj]; 4920 if (col >= cstart) break; 4921 cj[k] = col; bj++; 4922 ca[k++] = *ba++; 4923 } 4924 /* diagonal portion of A */ 4925 for (j=0; j<ncols_d; j++) { 4926 cj[k] = cstart + *aj++; 4927 ca[k++] = *aa++; 4928 } 4929 /* off-diagonal portion of A */ 4930 for (j=jo; j<ncols_o; j++) { 4931 cj[k] = cmap[*bj++]; 4932 ca[k++] = *ba++; 4933 } 4934 } 4935 /* put together the new matrix */ 4936 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4937 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4938 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4939 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4940 mat->free_a = PETSC_TRUE; 4941 mat->free_ij = PETSC_TRUE; 4942 mat->nonew = 0; 4943 } else if (scall == MAT_REUSE_MATRIX) { 4944 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4945 ci = mat->i; cj = mat->j; cam = mat->a; 4946 for (i=0; i<am; i++) { 4947 /* off-diagonal portion of A */ 4948 ncols_o = bi[i+1] - bi[i]; 4949 for (jo=0; jo<ncols_o; jo++) { 4950 col = cmap[*bj]; 4951 if (col >= cstart) break; 4952 *cam++ = *ba++; bj++; 4953 } 4954 /* diagonal portion of A */ 4955 ncols_d = ai[i+1] - ai[i]; 4956 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4957 /* off-diagonal portion of A */ 4958 for (j=jo; j<ncols_o; j++) { 4959 *cam++ = *ba++; bj++; 4960 } 4961 } 4962 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4963 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 /*@C 4968 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4969 4970 Not Collective 4971 4972 Input Parameters: 4973 + A - the matrix 4974 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4975 - row, col - index sets of rows and columns to extract (or NULL) 4976 4977 Output Parameter: 4978 . A_loc - the local sequential matrix generated 4979 4980 Level: developer 4981 4982 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4983 4984 @*/ 4985 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4986 { 4987 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4988 PetscErrorCode ierr; 4989 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4990 IS isrowa,iscola; 4991 Mat *aloc; 4992 PetscBool match; 4993 4994 PetscFunctionBegin; 4995 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4996 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4997 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4998 if (!row) { 4999 start = A->rmap->rstart; end = A->rmap->rend; 5000 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5001 } else { 5002 isrowa = *row; 5003 } 5004 if (!col) { 5005 start = A->cmap->rstart; 5006 cmap = a->garray; 5007 nzA = a->A->cmap->n; 5008 nzB = a->B->cmap->n; 5009 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5010 ncols = 0; 5011 for (i=0; i<nzB; i++) { 5012 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5013 else break; 5014 } 5015 imark = i; 5016 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5017 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5018 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5019 } else { 5020 iscola = *col; 5021 } 5022 if (scall != MAT_INITIAL_MATRIX) { 5023 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5024 aloc[0] = *A_loc; 5025 } 5026 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5027 *A_loc = aloc[0]; 5028 ierr = PetscFree(aloc);CHKERRQ(ierr); 5029 if (!row) { 5030 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5031 } 5032 if (!col) { 5033 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5034 } 5035 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 /*@C 5040 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5041 5042 Collective on Mat 5043 5044 Input Parameters: 5045 + A,B - the matrices in mpiaij format 5046 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5047 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5048 5049 Output Parameter: 5050 + rowb, colb - index sets of rows and columns of B to extract 5051 - B_seq - the sequential matrix generated 5052 5053 Level: developer 5054 5055 @*/ 5056 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5057 { 5058 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5059 PetscErrorCode ierr; 5060 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5061 IS isrowb,iscolb; 5062 Mat *bseq=NULL; 5063 5064 PetscFunctionBegin; 5065 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5066 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5067 } 5068 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5069 5070 if (scall == MAT_INITIAL_MATRIX) { 5071 start = A->cmap->rstart; 5072 cmap = a->garray; 5073 nzA = a->A->cmap->n; 5074 nzB = a->B->cmap->n; 5075 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5076 ncols = 0; 5077 for (i=0; i<nzB; i++) { /* row < local row index */ 5078 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5079 else break; 5080 } 5081 imark = i; 5082 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5083 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5084 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5085 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5086 } else { 5087 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5088 isrowb = *rowb; iscolb = *colb; 5089 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5090 bseq[0] = *B_seq; 5091 } 5092 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5093 *B_seq = bseq[0]; 5094 ierr = PetscFree(bseq);CHKERRQ(ierr); 5095 if (!rowb) { 5096 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5097 } else { 5098 *rowb = isrowb; 5099 } 5100 if (!colb) { 5101 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5102 } else { 5103 *colb = iscolb; 5104 } 5105 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5106 PetscFunctionReturn(0); 5107 } 5108 5109 /* 5110 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5111 of the OFF-DIAGONAL portion of local A 5112 5113 Collective on Mat 5114 5115 Input Parameters: 5116 + A,B - the matrices in mpiaij format 5117 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5118 5119 Output Parameter: 5120 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5121 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5122 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5123 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5124 5125 Level: developer 5126 5127 */ 5128 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5129 { 5130 VecScatter_MPI_General *gen_to,*gen_from; 5131 PetscErrorCode ierr; 5132 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5133 Mat_SeqAIJ *b_oth; 5134 VecScatter ctx; 5135 MPI_Comm comm; 5136 PetscMPIInt *rprocs,*sprocs,tag,rank; 5137 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5138 PetscInt *rvalues,*svalues; 5139 MatScalar *b_otha,*bufa,*bufA; 5140 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5141 MPI_Request *rwaits = NULL,*swaits = NULL; 5142 MPI_Status *sstatus,rstatus; 5143 PetscMPIInt jj,size; 5144 PetscInt *cols,sbs,rbs; 5145 PetscScalar *vals; 5146 5147 PetscFunctionBegin; 5148 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5150 5151 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5152 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5153 } 5154 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5155 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5156 5157 if (size == 1) { 5158 startsj_s = NULL; 5159 bufa_ptr = NULL; 5160 *B_oth = NULL; 5161 PetscFunctionReturn(0); 5162 } 5163 5164 if (!a->Mvctx_mpi1) { /* create a->Mvctx_mpi1 to be used for Mat-Mat ops */ 5165 a->Mvctx_mpi1_flg = PETSC_TRUE; 5166 ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr); 5167 } 5168 ctx = a->Mvctx_mpi1; 5169 tag = ((PetscObject)ctx)->tag; 5170 5171 gen_to = (VecScatter_MPI_General*)ctx->todata; 5172 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5173 nrecvs = gen_from->n; 5174 nsends = gen_to->n; 5175 5176 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5177 srow = gen_to->indices; /* local row index to be sent */ 5178 sstarts = gen_to->starts; 5179 sprocs = gen_to->procs; 5180 sstatus = gen_to->sstatus; 5181 sbs = gen_to->bs; 5182 rstarts = gen_from->starts; 5183 rprocs = gen_from->procs; 5184 rbs = gen_from->bs; 5185 5186 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5187 if (scall == MAT_INITIAL_MATRIX) { 5188 /* i-array */ 5189 /*---------*/ 5190 /* post receives */ 5191 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5192 for (i=0; i<nrecvs; i++) { 5193 rowlen = rvalues + rstarts[i]*rbs; 5194 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5195 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5196 } 5197 5198 /* pack the outgoing message */ 5199 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5200 5201 sstartsj[0] = 0; 5202 rstartsj[0] = 0; 5203 len = 0; /* total length of j or a array to be sent */ 5204 k = 0; 5205 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5206 for (i=0; i<nsends; i++) { 5207 rowlen = svalues + sstarts[i]*sbs; 5208 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5209 for (j=0; j<nrows; j++) { 5210 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5211 for (l=0; l<sbs; l++) { 5212 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5213 5214 rowlen[j*sbs+l] = ncols; 5215 5216 len += ncols; 5217 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5218 } 5219 k++; 5220 } 5221 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5222 5223 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5224 } 5225 /* recvs and sends of i-array are completed */ 5226 i = nrecvs; 5227 while (i--) { 5228 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5229 } 5230 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5231 ierr = PetscFree(svalues);CHKERRQ(ierr); 5232 5233 /* allocate buffers for sending j and a arrays */ 5234 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5235 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5236 5237 /* create i-array of B_oth */ 5238 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5239 5240 b_othi[0] = 0; 5241 len = 0; /* total length of j or a array to be received */ 5242 k = 0; 5243 for (i=0; i<nrecvs; i++) { 5244 rowlen = rvalues + rstarts[i]*rbs; 5245 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5246 for (j=0; j<nrows; j++) { 5247 b_othi[k+1] = b_othi[k] + rowlen[j]; 5248 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5249 k++; 5250 } 5251 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5252 } 5253 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5254 5255 /* allocate space for j and a arrrays of B_oth */ 5256 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5257 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5258 5259 /* j-array */ 5260 /*---------*/ 5261 /* post receives of j-array */ 5262 for (i=0; i<nrecvs; i++) { 5263 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5264 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5265 } 5266 5267 /* pack the outgoing message j-array */ 5268 k = 0; 5269 for (i=0; i<nsends; i++) { 5270 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5271 bufJ = bufj+sstartsj[i]; 5272 for (j=0; j<nrows; j++) { 5273 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5274 for (ll=0; ll<sbs; ll++) { 5275 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5276 for (l=0; l<ncols; l++) { 5277 *bufJ++ = cols[l]; 5278 } 5279 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5280 } 5281 } 5282 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5283 } 5284 5285 /* recvs and sends of j-array are completed */ 5286 i = nrecvs; 5287 while (i--) { 5288 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5289 } 5290 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5291 } else if (scall == MAT_REUSE_MATRIX) { 5292 sstartsj = *startsj_s; 5293 rstartsj = *startsj_r; 5294 bufa = *bufa_ptr; 5295 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5296 b_otha = b_oth->a; 5297 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5298 5299 /* a-array */ 5300 /*---------*/ 5301 /* post receives of a-array */ 5302 for (i=0; i<nrecvs; i++) { 5303 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5304 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5305 } 5306 5307 /* pack the outgoing message a-array */ 5308 k = 0; 5309 for (i=0; i<nsends; i++) { 5310 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5311 bufA = bufa+sstartsj[i]; 5312 for (j=0; j<nrows; j++) { 5313 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5314 for (ll=0; ll<sbs; ll++) { 5315 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5316 for (l=0; l<ncols; l++) { 5317 *bufA++ = vals[l]; 5318 } 5319 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5320 } 5321 } 5322 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5323 } 5324 /* recvs and sends of a-array are completed */ 5325 i = nrecvs; 5326 while (i--) { 5327 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5328 } 5329 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5330 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5331 5332 if (scall == MAT_INITIAL_MATRIX) { 5333 /* put together the new matrix */ 5334 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5335 5336 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5337 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5338 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5339 b_oth->free_a = PETSC_TRUE; 5340 b_oth->free_ij = PETSC_TRUE; 5341 b_oth->nonew = 0; 5342 5343 ierr = PetscFree(bufj);CHKERRQ(ierr); 5344 if (!startsj_s || !bufa_ptr) { 5345 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5346 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5347 } else { 5348 *startsj_s = sstartsj; 5349 *startsj_r = rstartsj; 5350 *bufa_ptr = bufa; 5351 } 5352 } 5353 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5354 PetscFunctionReturn(0); 5355 } 5356 5357 /*@C 5358 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5359 5360 Not Collective 5361 5362 Input Parameters: 5363 . A - The matrix in mpiaij format 5364 5365 Output Parameter: 5366 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5367 . colmap - A map from global column index to local index into lvec 5368 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5369 5370 Level: developer 5371 5372 @*/ 5373 #if defined(PETSC_USE_CTABLE) 5374 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5375 #else 5376 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5377 #endif 5378 { 5379 Mat_MPIAIJ *a; 5380 5381 PetscFunctionBegin; 5382 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5383 PetscValidPointer(lvec, 2); 5384 PetscValidPointer(colmap, 3); 5385 PetscValidPointer(multScatter, 4); 5386 a = (Mat_MPIAIJ*) A->data; 5387 if (lvec) *lvec = a->lvec; 5388 if (colmap) *colmap = a->colmap; 5389 if (multScatter) *multScatter = a->Mvctx; 5390 PetscFunctionReturn(0); 5391 } 5392 5393 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5394 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5395 #if defined(PETSC_HAVE_MKL_SPARSE) 5396 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5397 #endif 5398 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5399 #if defined(PETSC_HAVE_ELEMENTAL) 5400 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5401 #endif 5402 #if defined(PETSC_HAVE_HYPRE) 5403 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5404 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5405 #endif 5406 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5408 5409 /* 5410 Computes (B'*A')' since computing B*A directly is untenable 5411 5412 n p p 5413 ( ) ( ) ( ) 5414 m ( A ) * n ( B ) = m ( C ) 5415 ( ) ( ) ( ) 5416 5417 */ 5418 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5419 { 5420 PetscErrorCode ierr; 5421 Mat At,Bt,Ct; 5422 5423 PetscFunctionBegin; 5424 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5425 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5426 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5427 ierr = MatDestroy(&At);CHKERRQ(ierr); 5428 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5429 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5430 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5431 PetscFunctionReturn(0); 5432 } 5433 5434 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5435 { 5436 PetscErrorCode ierr; 5437 PetscInt m=A->rmap->n,n=B->cmap->n; 5438 Mat Cmat; 5439 5440 PetscFunctionBegin; 5441 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5442 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5443 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5444 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5445 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5446 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5447 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5448 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5449 5450 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5451 5452 *C = Cmat; 5453 PetscFunctionReturn(0); 5454 } 5455 5456 /* ----------------------------------------------------------------*/ 5457 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5458 { 5459 PetscErrorCode ierr; 5460 5461 PetscFunctionBegin; 5462 if (scall == MAT_INITIAL_MATRIX) { 5463 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5464 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5465 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5466 } 5467 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5468 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5469 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5470 PetscFunctionReturn(0); 5471 } 5472 5473 /*MC 5474 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5475 5476 Options Database Keys: 5477 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5478 5479 Level: beginner 5480 5481 .seealso: MatCreateAIJ() 5482 M*/ 5483 5484 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5485 { 5486 Mat_MPIAIJ *b; 5487 PetscErrorCode ierr; 5488 PetscMPIInt size; 5489 5490 PetscFunctionBegin; 5491 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5492 5493 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5494 B->data = (void*)b; 5495 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5496 B->assembled = PETSC_FALSE; 5497 B->insertmode = NOT_SET_VALUES; 5498 b->size = size; 5499 5500 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5501 5502 /* build cache for off array entries formed */ 5503 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5504 5505 b->donotstash = PETSC_FALSE; 5506 b->colmap = 0; 5507 b->garray = 0; 5508 b->roworiented = PETSC_TRUE; 5509 5510 /* stuff used for matrix vector multiply */ 5511 b->lvec = NULL; 5512 b->Mvctx = NULL; 5513 5514 /* stuff for MatGetRow() */ 5515 b->rowindices = 0; 5516 b->rowvalues = 0; 5517 b->getrowactive = PETSC_FALSE; 5518 5519 /* flexible pointer used in CUSP/CUSPARSE classes */ 5520 b->spptr = NULL; 5521 5522 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5523 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5524 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5525 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5527 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5531 #if defined(PETSC_HAVE_MKL_SPARSE) 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5533 #endif 5534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5535 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5536 #if defined(PETSC_HAVE_ELEMENTAL) 5537 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5538 #endif 5539 #if defined(PETSC_HAVE_HYPRE) 5540 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5541 #endif 5542 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5543 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5544 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5545 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5546 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5547 #if defined(PETSC_HAVE_HYPRE) 5548 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5549 #endif 5550 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5551 PetscFunctionReturn(0); 5552 } 5553 5554 /*@C 5555 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5556 and "off-diagonal" part of the matrix in CSR format. 5557 5558 Collective on MPI_Comm 5559 5560 Input Parameters: 5561 + comm - MPI communicator 5562 . m - number of local rows (Cannot be PETSC_DECIDE) 5563 . n - This value should be the same as the local size used in creating the 5564 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5565 calculated if N is given) For square matrices n is almost always m. 5566 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5567 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5568 . i - row indices for "diagonal" portion of matrix 5569 . j - column indices 5570 . a - matrix values 5571 . oi - row indices for "off-diagonal" portion of matrix 5572 . oj - column indices 5573 - oa - matrix values 5574 5575 Output Parameter: 5576 . mat - the matrix 5577 5578 Level: advanced 5579 5580 Notes: 5581 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5582 must free the arrays once the matrix has been destroyed and not before. 5583 5584 The i and j indices are 0 based 5585 5586 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5587 5588 This sets local rows and cannot be used to set off-processor values. 5589 5590 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5591 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5592 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5593 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5594 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5595 communication if it is known that only local entries will be set. 5596 5597 .keywords: matrix, aij, compressed row, sparse, parallel 5598 5599 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5600 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5601 @*/ 5602 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5603 { 5604 PetscErrorCode ierr; 5605 Mat_MPIAIJ *maij; 5606 5607 PetscFunctionBegin; 5608 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5609 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5610 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5611 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5612 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5613 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5614 maij = (Mat_MPIAIJ*) (*mat)->data; 5615 5616 (*mat)->preallocated = PETSC_TRUE; 5617 5618 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5619 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5620 5621 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5622 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5623 5624 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5625 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5626 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5627 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5628 5629 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5630 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5631 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5632 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5633 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5634 PetscFunctionReturn(0); 5635 } 5636 5637 /* 5638 Special version for direct calls from Fortran 5639 */ 5640 #include <petsc/private/fortranimpl.h> 5641 5642 /* Change these macros so can be used in void function */ 5643 #undef CHKERRQ 5644 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5645 #undef SETERRQ2 5646 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5647 #undef SETERRQ3 5648 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5649 #undef SETERRQ 5650 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5651 5652 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5653 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5654 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5655 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5656 #else 5657 #endif 5658 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5659 { 5660 Mat mat = *mmat; 5661 PetscInt m = *mm, n = *mn; 5662 InsertMode addv = *maddv; 5663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5664 PetscScalar value; 5665 PetscErrorCode ierr; 5666 5667 MatCheckPreallocated(mat,1); 5668 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5669 5670 #if defined(PETSC_USE_DEBUG) 5671 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5672 #endif 5673 { 5674 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5675 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5676 PetscBool roworiented = aij->roworiented; 5677 5678 /* Some Variables required in the macro */ 5679 Mat A = aij->A; 5680 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5681 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5682 MatScalar *aa = a->a; 5683 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5684 Mat B = aij->B; 5685 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5686 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5687 MatScalar *ba = b->a; 5688 5689 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5690 PetscInt nonew = a->nonew; 5691 MatScalar *ap1,*ap2; 5692 5693 PetscFunctionBegin; 5694 for (i=0; i<m; i++) { 5695 if (im[i] < 0) continue; 5696 #if defined(PETSC_USE_DEBUG) 5697 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5698 #endif 5699 if (im[i] >= rstart && im[i] < rend) { 5700 row = im[i] - rstart; 5701 lastcol1 = -1; 5702 rp1 = aj + ai[row]; 5703 ap1 = aa + ai[row]; 5704 rmax1 = aimax[row]; 5705 nrow1 = ailen[row]; 5706 low1 = 0; 5707 high1 = nrow1; 5708 lastcol2 = -1; 5709 rp2 = bj + bi[row]; 5710 ap2 = ba + bi[row]; 5711 rmax2 = bimax[row]; 5712 nrow2 = bilen[row]; 5713 low2 = 0; 5714 high2 = nrow2; 5715 5716 for (j=0; j<n; j++) { 5717 if (roworiented) value = v[i*n+j]; 5718 else value = v[i+j*m]; 5719 if (in[j] >= cstart && in[j] < cend) { 5720 col = in[j] - cstart; 5721 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5722 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5723 } else if (in[j] < 0) continue; 5724 #if defined(PETSC_USE_DEBUG) 5725 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5726 #endif 5727 else { 5728 if (mat->was_assembled) { 5729 if (!aij->colmap) { 5730 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5731 } 5732 #if defined(PETSC_USE_CTABLE) 5733 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5734 col--; 5735 #else 5736 col = aij->colmap[in[j]] - 1; 5737 #endif 5738 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5739 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5740 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5741 col = in[j]; 5742 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5743 B = aij->B; 5744 b = (Mat_SeqAIJ*)B->data; 5745 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5746 rp2 = bj + bi[row]; 5747 ap2 = ba + bi[row]; 5748 rmax2 = bimax[row]; 5749 nrow2 = bilen[row]; 5750 low2 = 0; 5751 high2 = nrow2; 5752 bm = aij->B->rmap->n; 5753 ba = b->a; 5754 } 5755 } else col = in[j]; 5756 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5757 } 5758 } 5759 } else if (!aij->donotstash) { 5760 if (roworiented) { 5761 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5762 } else { 5763 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5764 } 5765 } 5766 } 5767 } 5768 PetscFunctionReturnVoid(); 5769 } 5770 5771