1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 if (!rank) { 206 bses[0] = gmat->rmap->bs; 207 bses[1] = gmat->cmap->bs; 208 } 209 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 210 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 211 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 212 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 213 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 214 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 215 216 rowners[0] = 0; 217 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 218 rstart = rowners[rank]; 219 rend = rowners[rank+1]; 220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 221 if (!rank) { 222 gmata = (Mat_SeqAIJ*) gmat->data; 223 /* send row lengths to all processors */ 224 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 225 for (i=1; i<size; i++) { 226 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 227 } 228 /* determine number diagonal and off-diagonal counts */ 229 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 231 jj = 0; 232 for (i=0; i<m; i++) { 233 for (j=0; j<dlens[i]; j++) { 234 if (gmata->j[jj] < rstart) ld[i]++; 235 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 236 jj++; 237 } 238 } 239 /* send column indices to other processes */ 240 for (i=1; i<size; i++) { 241 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 242 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 243 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 244 } 245 246 /* send numerical values to other processes */ 247 for (i=1; i<size; i++) { 248 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 249 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 250 } 251 gmataa = gmata->a; 252 gmataj = gmata->j; 253 254 } else { 255 /* receive row lengths */ 256 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 257 /* receive column indices */ 258 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 260 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 /* determine number diagonal and off-diagonal counts */ 262 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 264 jj = 0; 265 for (i=0; i<m; i++) { 266 for (j=0; j<dlens[i]; j++) { 267 if (gmataj[jj] < rstart) ld[i]++; 268 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 269 jj++; 270 } 271 } 272 /* receive numerical values */ 273 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 275 } 276 /* set preallocation */ 277 for (i=0; i<m; i++) { 278 dlens[i] -= olens[i]; 279 } 280 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 281 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 282 283 for (i=0; i<m; i++) { 284 dlens[i] += olens[i]; 285 } 286 cnt = 0; 287 for (i=0; i<m; i++) { 288 row = rstart + i; 289 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 290 cnt += dlens[i]; 291 } 292 if (rank) { 293 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 294 } 295 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 296 ierr = PetscFree(rowners);CHKERRQ(ierr); 297 298 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 299 300 *inmat = mat; 301 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 304 mat = *inmat; 305 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 306 if (!rank) { 307 /* send numerical values to other processes */ 308 gmata = (Mat_SeqAIJ*) gmat->data; 309 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 310 gmataa = gmata->a; 311 for (i=1; i<size; i++) { 312 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 313 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 314 } 315 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 316 } else { 317 /* receive numerical values from process 0*/ 318 nz = Ad->nz + Ao->nz; 319 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 320 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 321 } 322 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 323 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 324 ad = Ad->a; 325 ao = Ao->a; 326 if (mat->rmap->n) { 327 i = 0; 328 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 329 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 330 } 331 for (i=1; i<mat->rmap->n; i++) { 332 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 333 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 334 } 335 i--; 336 if (mat->rmap->n) { 337 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 338 } 339 if (rank) { 340 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 /* 349 Local utility routine that creates a mapping from the global column 350 number to the local number in the off-diagonal part of the local 351 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 352 a slightly higher hash table cost; without it it is not scalable (each processor 353 has an order N integer array but is fast to acess. 354 */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 358 { 359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 360 PetscErrorCode ierr; 361 PetscInt n = aij->B->cmap->n,i; 362 363 PetscFunctionBegin; 364 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 365 #if defined(PETSC_USE_CTABLE) 366 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 367 for (i=0; i<n; i++) { 368 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 #else 371 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 372 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 373 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 374 #endif 375 PetscFunctionReturn(0); 376 } 377 378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 379 { \ 380 if (col <= lastcol1) low1 = 0; \ 381 else high1 = nrow1; \ 382 lastcol1 = col;\ 383 while (high1-low1 > 5) { \ 384 t = (low1+high1)/2; \ 385 if (rp1[t] > col) high1 = t; \ 386 else low1 = t; \ 387 } \ 388 for (_i=low1; _i<high1; _i++) { \ 389 if (rp1[_i] > col) break; \ 390 if (rp1[_i] == col) { \ 391 if (addv == ADD_VALUES) ap1[_i] += value; \ 392 else ap1[_i] = value; \ 393 goto a_noinsert; \ 394 } \ 395 } \ 396 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 397 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 398 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 399 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 400 N = nrow1++ - 1; a->nz++; high1++; \ 401 /* shift up all the later entries in this row */ \ 402 for (ii=N; ii>=_i; ii--) { \ 403 rp1[ii+1] = rp1[ii]; \ 404 ap1[ii+1] = ap1[ii]; \ 405 } \ 406 rp1[_i] = col; \ 407 ap1[_i] = value; \ 408 a_noinsert: ; \ 409 ailen[row] = nrow1; \ 410 } 411 412 413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 414 { \ 415 if (col <= lastcol2) low2 = 0; \ 416 else high2 = nrow2; \ 417 lastcol2 = col; \ 418 while (high2-low2 > 5) { \ 419 t = (low2+high2)/2; \ 420 if (rp2[t] > col) high2 = t; \ 421 else low2 = t; \ 422 } \ 423 for (_i=low2; _i<high2; _i++) { \ 424 if (rp2[_i] > col) break; \ 425 if (rp2[_i] == col) { \ 426 if (addv == ADD_VALUES) ap2[_i] += value; \ 427 else ap2[_i] = value; \ 428 goto b_noinsert; \ 429 } \ 430 } \ 431 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 432 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 433 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 434 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 435 N = nrow2++ - 1; b->nz++; high2++; \ 436 /* shift up all the later entries in this row */ \ 437 for (ii=N; ii>=_i; ii--) { \ 438 rp2[ii+1] = rp2[ii]; \ 439 ap2[ii+1] = ap2[ii]; \ 440 } \ 441 rp2[_i] = col; \ 442 ap2[_i] = value; \ 443 b_noinsert: ; \ 444 bilen[row] = nrow2; \ 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 450 { 451 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 453 PetscErrorCode ierr; 454 PetscInt l,*garray = mat->garray,diag; 455 456 PetscFunctionBegin; 457 /* code only works for square matrices A */ 458 459 /* find size of row to the left of the diagonal part */ 460 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 461 row = row - diag; 462 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 463 if (garray[b->j[b->i[row]+l]] > diag) break; 464 } 465 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* diagonal part */ 468 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 469 470 /* right of diagonal part */ 471 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatSetValues_MPIAIJ" 477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 478 { 479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 480 PetscScalar value; 481 PetscErrorCode ierr; 482 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 483 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 484 PetscBool roworiented = aij->roworiented; 485 486 /* Some Variables required in the macro */ 487 Mat A = aij->A; 488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 489 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 490 MatScalar *aa = a->a; 491 PetscBool ignorezeroentries = a->ignorezeroentries; 492 Mat B = aij->B; 493 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 494 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 495 MatScalar *ba = b->a; 496 497 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 498 PetscInt nonew; 499 MatScalar *ap1,*ap2; 500 501 PetscFunctionBegin; 502 if (v) PetscValidScalarPointer(v,6); 503 for (i=0; i<m; i++) { 504 if (im[i] < 0) continue; 505 #if defined(PETSC_USE_DEBUG) 506 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 507 #endif 508 if (im[i] >= rstart && im[i] < rend) { 509 row = im[i] - rstart; 510 lastcol1 = -1; 511 rp1 = aj + ai[row]; 512 ap1 = aa + ai[row]; 513 rmax1 = aimax[row]; 514 nrow1 = ailen[row]; 515 low1 = 0; 516 high1 = nrow1; 517 lastcol2 = -1; 518 rp2 = bj + bi[row]; 519 ap2 = ba + bi[row]; 520 rmax2 = bimax[row]; 521 nrow2 = bilen[row]; 522 low2 = 0; 523 high2 = nrow2; 524 525 for (j=0; j<n; j++) { 526 if (v) { 527 if (roworiented) value = v[i*n+j]; 528 else value = v[i+j*m]; 529 } else value = 0.0; 530 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 531 if (in[j] >= cstart && in[j] < cend) { 532 col = in[j] - cstart; 533 nonew = a->nonew; 534 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 535 } else if (in[j] < 0) continue; 536 #if defined(PETSC_USE_DEBUG) 537 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 538 #endif 539 else { 540 if (mat->was_assembled) { 541 if (!aij->colmap) { 542 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 543 } 544 #if defined(PETSC_USE_CTABLE) 545 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 546 col--; 547 #else 548 col = aij->colmap[in[j]] - 1; 549 #endif 550 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 551 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 552 col = in[j]; 553 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 554 B = aij->B; 555 b = (Mat_SeqAIJ*)B->data; 556 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 557 rp2 = bj + bi[row]; 558 ap2 = ba + bi[row]; 559 rmax2 = bimax[row]; 560 nrow2 = bilen[row]; 561 low2 = 0; 562 high2 = nrow2; 563 bm = aij->B->rmap->n; 564 ba = b->a; 565 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 566 } else col = in[j]; 567 nonew = b->nonew; 568 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 569 } 570 } 571 } else { 572 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 573 if (!aij->donotstash) { 574 mat->assembled = PETSC_FALSE; 575 if (roworiented) { 576 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 577 } else { 578 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 579 } 580 } 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatGetValues_MPIAIJ" 588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 PetscErrorCode ierr; 592 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 593 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 594 595 PetscFunctionBegin; 596 for (i=0; i<m; i++) { 597 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 598 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 599 if (idxm[i] >= rstart && idxm[i] < rend) { 600 row = idxm[i] - rstart; 601 for (j=0; j<n; j++) { 602 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 603 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 604 if (idxn[j] >= cstart && idxn[j] < cend) { 605 col = idxn[j] - cstart; 606 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 607 } else { 608 if (!aij->colmap) { 609 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 610 } 611 #if defined(PETSC_USE_CTABLE) 612 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 613 col--; 614 #else 615 col = aij->colmap[idxn[j]] - 1; 616 #endif 617 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 618 else { 619 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } 621 } 622 } 623 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 624 } 625 PetscFunctionReturn(0); 626 } 627 628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 629 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 633 { 634 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 635 PetscErrorCode ierr; 636 PetscInt nstash,reallocs; 637 InsertMode addv; 638 639 PetscFunctionBegin; 640 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 641 642 /* make sure all processors are either in INSERTMODE or ADDMODE */ 643 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 644 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 645 mat->insertmode = addv; /* in case this processor had no cache */ 646 647 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 648 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 649 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 656 { 657 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 659 PetscErrorCode ierr; 660 PetscMPIInt n; 661 PetscInt i,j,rstart,ncols,flg; 662 PetscInt *row,*col; 663 PetscBool other_disassembled; 664 PetscScalar *val; 665 InsertMode addv = mat->insertmode; 666 667 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 668 669 PetscFunctionBegin; 670 if (!aij->donotstash && !mat->nooffprocentries) { 671 while (1) { 672 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 673 if (!flg) break; 674 675 for (i=0; i<n; ) { 676 /* Now identify the consecutive vals belonging to the same row */ 677 for (j=i,rstart=row[j]; j<n; j++) { 678 if (row[j] != rstart) break; 679 } 680 if (j < n) ncols = j-i; 681 else ncols = n-i; 682 /* Now assemble all these values with a single function call */ 683 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 684 685 i = j; 686 } 687 } 688 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 689 } 690 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 691 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 692 693 /* determine if any processor has disassembled, if so we must 694 also disassemble ourselfs, in order that we may reassemble. */ 695 /* 696 if nonzero structure of submatrix B cannot change then we know that 697 no processor disassembled thus we can skip this stuff 698 */ 699 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 700 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 701 if (mat->was_assembled && !other_disassembled) { 702 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 } 705 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 706 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 709 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 710 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 711 712 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 713 714 aij->rowvalues = 0; 715 716 /* used by MatAXPY() */ 717 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 718 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 719 720 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 721 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 728 { 729 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 730 PetscErrorCode ierr; 731 732 PetscFunctionBegin; 733 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 734 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatZeroRows_MPIAIJ" 740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 741 { 742 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 743 PetscInt *owners = A->rmap->range; 744 PetscInt n = A->rmap->n; 745 PetscMPIInt size = mat->size; 746 PetscSF sf; 747 PetscInt *lrows; 748 PetscSFNode *rrows; 749 PetscInt lastidx = -1, r, p = 0, len = 0; 750 PetscErrorCode ierr; 751 752 PetscFunctionBegin; 753 /* Create SF where leaves are input rows and roots are owned rows */ 754 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 755 for (r = 0; r < n; ++r) lrows[r] = -1; 756 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 757 for (r = 0; r < N; ++r) { 758 const PetscInt idx = rows[r]; 759 PetscBool found = PETSC_FALSE; 760 /* Trick for efficient searching for sorted rows */ 761 if (lastidx > idx) p = 0; 762 lastidx = idx; 763 for (; p < size; ++p) { 764 if (idx >= owners[p] && idx < owners[p+1]) { 765 rrows[r].rank = p; 766 rrows[r].index = rows[r] - owners[p]; 767 found = PETSC_TRUE; 768 break; 769 } 770 } 771 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 772 } 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 /* fix right hand side if needed */ 782 if (x && b) { 783 const PetscScalar *xx; 784 PetscScalar *bb; 785 786 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 788 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 789 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 790 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 791 } 792 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 793 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 794 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 795 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 796 } else if (diag != 0.0) { 797 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 798 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 802 } 803 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 } else { 806 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 807 } 808 ierr = PetscFree(lrows);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 815 { 816 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 817 PetscErrorCode ierr; 818 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 819 PetscInt i,j,r,m,p = 0,len; 820 PetscInt *lrows,*owners = A->rmap->range; 821 PetscSFNode *rrows; 822 PetscSF sf; 823 const PetscScalar *xx; 824 PetscScalar *bb,*mask; 825 Vec xmask,lmask; 826 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 827 const PetscInt *aj, *ii,*ridx; 828 PetscScalar *aa; 829 #if defined(PETSC_DEBUG) 830 PetscBool found = PETSC_FALSE; 831 #endif 832 833 PetscFunctionBegin; 834 /* Create SF where leaves are input rows and roots are owned rows */ 835 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 836 for (r = 0; r < n; ++r) lrows[r] = -1; 837 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 838 for (r = 0; r < N; ++r) { 839 const PetscInt idx = rows[r]; 840 PetscBool found = PETSC_FALSE; 841 /* Trick for efficient searching for sorted rows */ 842 if (lastidx > idx) p = 0; 843 lastidx = idx; 844 for (; p < size; ++p) { 845 if (idx >= owners[p] && idx < owners[p+1]) { 846 rrows[r].rank = p; 847 rrows[r].index = rows[r] - owners[p]; 848 found = PETSC_TRUE; 849 break; 850 } 851 } 852 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 853 } 854 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 855 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 856 /* Collect flags for rows to be zeroed */ 857 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 858 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 859 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 860 /* Compress and put in row numbers */ 861 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 862 /* zero diagonal part of matrix */ 863 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 864 /* handle off diagonal part of matrix */ 865 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 866 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 867 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 868 for (i=0; i<len; i++) bb[lrows[i]] = 1; 869 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 870 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 873 if (x) { 874 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 877 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 878 } 879 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 880 /* remove zeroed rows of off diagonal matrix */ 881 ii = aij->i; 882 for (i=0; i<len; i++) { 883 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 884 } 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i=0; i<m; i++) { 891 n = ii[i+1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij->a + ii[i]; 894 895 for (j=0; j<n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa*xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i=0; i<m; i++) { 908 n = ii[i+1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij->a + ii[i]; 911 for (j=0; j<n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa*xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x) { 922 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 923 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 924 } 925 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 926 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 927 ierr = PetscFree(lrows);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 931 #undef __FUNCT__ 932 #define __FUNCT__ "MatMult_MPIAIJ" 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 #undef __FUNCT__ 962 #define __FUNCT__ "MatMultAdd_MPIAIJ" 963 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 964 { 965 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 966 PetscErrorCode ierr; 967 968 PetscFunctionBegin; 969 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 970 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 971 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 973 PetscFunctionReturn(0); 974 } 975 976 #undef __FUNCT__ 977 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 978 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 979 { 980 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 981 PetscErrorCode ierr; 982 PetscBool merged; 983 984 PetscFunctionBegin; 985 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 986 /* do nondiagonal part */ 987 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 988 if (!merged) { 989 /* send it on its way */ 990 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* receive remote parts: note this assumes the values are not actually */ 994 /* added in yy until the next line, */ 995 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 996 } else { 997 /* do local part */ 998 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 999 /* send it on its way */ 1000 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1001 /* values actually were received in the Begin() but we need to call this nop */ 1002 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1009 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1010 { 1011 MPI_Comm comm; 1012 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1013 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1014 IS Me,Notme; 1015 PetscErrorCode ierr; 1016 PetscInt M,N,first,last,*notme,i; 1017 PetscMPIInt size; 1018 1019 PetscFunctionBegin; 1020 /* Easy test: symmetric diagonal block */ 1021 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1022 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1023 if (!*f) PetscFunctionReturn(0); 1024 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1025 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1026 if (size == 1) PetscFunctionReturn(0); 1027 1028 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1029 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1030 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1031 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1032 for (i=0; i<first; i++) notme[i] = i; 1033 for (i=last; i<M; i++) notme[i-last+first] = i; 1034 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1035 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1036 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1037 Aoff = Aoffs[0]; 1038 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1039 Boff = Boffs[0]; 1040 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1041 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1042 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1043 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1044 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1045 ierr = PetscFree(notme);CHKERRQ(ierr); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 #undef __FUNCT__ 1050 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1051 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1054 PetscErrorCode ierr; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1059 /* send it on its way */ 1060 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 /* do local part */ 1062 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1063 /* receive remote parts */ 1064 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 /* 1069 This only works correctly for square matrices where the subblock A->A is the 1070 diagonal block 1071 */ 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1074 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1075 { 1076 PetscErrorCode ierr; 1077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1078 1079 PetscFunctionBegin; 1080 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1081 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1082 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatScale_MPIAIJ" 1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 1093 PetscFunctionBegin; 1094 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1095 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatDestroy_MPIAIJ" 1101 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1102 { 1103 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 #if defined(PETSC_USE_LOG) 1108 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1109 #endif 1110 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1111 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1112 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1113 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1114 #if defined(PETSC_USE_CTABLE) 1115 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1116 #else 1117 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1118 #endif 1119 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1120 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1121 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1122 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1123 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1124 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1125 1126 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1134 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1140 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1141 { 1142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1143 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1144 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1145 PetscErrorCode ierr; 1146 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1147 int fd; 1148 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1149 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1150 PetscScalar *column_values; 1151 PetscInt message_count,flowcontrolcount; 1152 FILE *file; 1153 1154 PetscFunctionBegin; 1155 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1156 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1157 nz = A->nz + B->nz; 1158 if (!rank) { 1159 header[0] = MAT_FILE_CLASSID; 1160 header[1] = mat->rmap->N; 1161 header[2] = mat->cmap->N; 1162 1163 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1164 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1165 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166 /* get largest number of rows any processor has */ 1167 rlen = mat->rmap->n; 1168 range = mat->rmap->range; 1169 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1170 } else { 1171 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 rlen = mat->rmap->n; 1173 } 1174 1175 /* load up the local row counts */ 1176 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1177 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1178 1179 /* store the row lengths to the file */ 1180 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1181 if (!rank) { 1182 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 for (i=1; i<size; i++) { 1184 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1185 rlen = range[i+1] - range[i]; 1186 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1188 } 1189 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1190 } else { 1191 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1192 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1193 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1194 } 1195 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1196 1197 /* load up the local column indices */ 1198 nzmax = nz; /* th processor needs space a largest processor needs */ 1199 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1200 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1201 cnt = 0; 1202 for (i=0; i<mat->rmap->n; i++) { 1203 for (j=B->i[i]; j<B->i[i+1]; j++) { 1204 if ((col = garray[B->j[j]]) > cstart) break; 1205 column_indices[cnt++] = col; 1206 } 1207 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1208 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1209 } 1210 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1211 1212 /* store the column indices to the file */ 1213 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1214 if (!rank) { 1215 MPI_Status status; 1216 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1217 for (i=1; i<size; i++) { 1218 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1219 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1220 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1221 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1223 } 1224 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1225 } else { 1226 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1227 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1228 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1229 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1230 } 1231 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1232 1233 /* load up the local column values */ 1234 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1235 cnt = 0; 1236 for (i=0; i<mat->rmap->n; i++) { 1237 for (j=B->i[i]; j<B->i[i+1]; j++) { 1238 if (garray[B->j[j]] > cstart) break; 1239 column_values[cnt++] = B->a[j]; 1240 } 1241 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1242 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1243 } 1244 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1245 1246 /* store the column values to the file */ 1247 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1248 if (!rank) { 1249 MPI_Status status; 1250 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1251 for (i=1; i<size; i++) { 1252 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1253 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1254 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1255 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1256 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1257 } 1258 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1259 } else { 1260 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1261 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1262 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1263 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1264 } 1265 ierr = PetscFree(column_values);CHKERRQ(ierr); 1266 1267 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1268 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 #include <petscdraw.h> 1273 #undef __FUNCT__ 1274 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1278 PetscErrorCode ierr; 1279 PetscMPIInt rank = aij->rank,size = aij->size; 1280 PetscBool isdraw,iascii,isbinary; 1281 PetscViewer sviewer; 1282 PetscViewerFormat format; 1283 1284 PetscFunctionBegin; 1285 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1286 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1287 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1288 if (iascii) { 1289 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscBool inodes; 1293 1294 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1295 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1296 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1297 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1298 if (!inodes) { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } else { 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1303 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1304 } 1305 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1307 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1309 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1312 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1313 PetscFunctionReturn(0); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount,inodelimit,*inodes; 1316 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1317 if (inodes) { 1318 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1321 } 1322 PetscFunctionReturn(0); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(0); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1329 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1330 } else { 1331 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (isdraw) { 1335 PetscDraw draw; 1336 PetscBool isnull; 1337 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1338 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1339 } 1340 1341 if (size == 1) { 1342 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1343 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1344 } else { 1345 /* assemble the entire matrix onto first processor. */ 1346 Mat A; 1347 Mat_SeqAIJ *Aloc; 1348 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1349 MatScalar *a; 1350 1351 if (mat->rmap->N > 1024) { 1352 PetscBool flg = PETSC_FALSE; 1353 1354 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1355 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1356 } 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1359 if (!rank) { 1360 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1361 } else { 1362 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1363 } 1364 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1365 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1366 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1367 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1368 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1369 1370 /* copy over the A part */ 1371 Aloc = (Mat_SeqAIJ*)aij->A->data; 1372 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1373 row = mat->rmap->rstart; 1374 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1375 for (i=0; i<m; i++) { 1376 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1377 row++; 1378 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1379 } 1380 aj = Aloc->j; 1381 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1382 1383 /* copy over the B part */ 1384 Aloc = (Mat_SeqAIJ*)aij->B->data; 1385 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1388 ct = cols; 1389 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1390 for (i=0; i<m; i++) { 1391 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1392 row++; 1393 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1394 } 1395 ierr = PetscFree(ct);CHKERRQ(ierr); 1396 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1397 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1398 /* 1399 Everyone has to call to draw the matrix since the graphics waits are 1400 synchronized across all processors that share the PetscDraw object 1401 */ 1402 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1403 if (!rank) { 1404 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1405 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1406 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1407 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1408 } 1409 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1410 ierr = MatDestroy(&A);CHKERRQ(ierr); 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatView_MPIAIJ" 1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1418 { 1419 PetscErrorCode ierr; 1420 PetscBool iascii,isdraw,issocket,isbinary; 1421 1422 PetscFunctionBegin; 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1424 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1425 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1426 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1427 if (iascii || isdraw || isbinary || issocket) { 1428 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSOR_MPIAIJ" 1435 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1436 { 1437 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1438 PetscErrorCode ierr; 1439 Vec bb1 = 0; 1440 PetscBool hasop; 1441 1442 PetscFunctionBegin; 1443 if (flag == SOR_APPLY_UPPER) { 1444 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1449 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1450 } 1451 1452 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1453 if (flag & SOR_ZERO_INITIAL_GUESS) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 its--; 1456 } 1457 1458 while (its--) { 1459 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1464 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1465 1466 /* local sweep */ 1467 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1468 } 1469 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1472 its--; 1473 } 1474 while (its--) { 1475 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1480 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1481 1482 /* local sweep */ 1483 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1484 } 1485 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1486 if (flag & SOR_ZERO_INITIAL_GUESS) { 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1488 its--; 1489 } 1490 while (its--) { 1491 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1493 1494 /* update rhs: bb1 = bb - B*x */ 1495 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1496 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1497 1498 /* local sweep */ 1499 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1500 } 1501 } else if (flag & SOR_EISENSTAT) { 1502 Vec xx1; 1503 1504 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1505 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1506 1507 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 if (!mat->diag) { 1510 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1511 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1512 } 1513 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1514 if (hasop) { 1515 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1516 } else { 1517 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1518 } 1519 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1520 1521 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1522 1523 /* local sweep */ 1524 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1525 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1526 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1527 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1528 1529 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatPermute_MPIAIJ" 1535 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1536 { 1537 Mat aA,aB,Aperm; 1538 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1539 PetscScalar *aa,*ba; 1540 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1541 PetscSF rowsf,sf; 1542 IS parcolp = NULL; 1543 PetscBool done; 1544 PetscErrorCode ierr; 1545 1546 PetscFunctionBegin; 1547 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1548 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1549 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1550 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1551 1552 /* Invert row permutation to find out where my rows should go */ 1553 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1554 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1555 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1556 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1557 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1558 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1559 1560 /* Invert column permutation to find out where my columns should go */ 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1564 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1565 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1568 1569 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1570 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1571 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1572 1573 /* Find out where my gcols should go */ 1574 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1575 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1576 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1577 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1578 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1579 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1580 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1581 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1582 1583 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1584 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1585 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1586 for (i=0; i<m; i++) { 1587 PetscInt row = rdest[i],rowner; 1588 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1589 for (j=ai[i]; j<ai[i+1]; j++) { 1590 PetscInt cowner,col = cdest[aj[j]]; 1591 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1592 if (rowner == cowner) dnnz[i]++; 1593 else onnz[i]++; 1594 } 1595 for (j=bi[i]; j<bi[i+1]; j++) { 1596 PetscInt cowner,col = gcdest[bj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 } 1602 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1603 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1604 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1605 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1606 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1607 1608 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1609 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1610 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1611 for (i=0; i<m; i++) { 1612 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1613 PetscInt rowlen; 1614 rowlen = ai[i+1] - ai[i]; 1615 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1616 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1617 rowlen = bi[i+1] - bi[i]; 1618 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1619 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1620 } 1621 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1622 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1624 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1625 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1626 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1627 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1628 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1629 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1630 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1631 *B = Aperm; 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #undef __FUNCT__ 1636 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1638 { 1639 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1640 Mat A = mat->A,B = mat->B; 1641 PetscErrorCode ierr; 1642 PetscReal isend[5],irecv[5]; 1643 1644 PetscFunctionBegin; 1645 info->block_size = 1.0; 1646 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1649 isend[3] = info->memory; isend[4] = info->mallocs; 1650 1651 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1652 1653 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1654 isend[3] += info->memory; isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatSetOption_MPIAIJ" 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 PetscErrorCode ierr; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 MatCheckPreallocated(A,1); 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 a->roworiented = flg; 1706 1707 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1708 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1709 break; 1710 case MAT_NEW_DIAGONALS: 1711 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1712 break; 1713 case MAT_IGNORE_OFF_PROC_ENTRIES: 1714 a->donotstash = flg; 1715 break; 1716 case MAT_SPD: 1717 A->spd_set = PETSC_TRUE; 1718 A->spd = flg; 1719 if (flg) { 1720 A->symmetric = PETSC_TRUE; 1721 A->structurally_symmetric = PETSC_TRUE; 1722 A->symmetric_set = PETSC_TRUE; 1723 A->structurally_symmetric_set = PETSC_TRUE; 1724 } 1725 break; 1726 case MAT_SYMMETRIC: 1727 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1728 break; 1729 case MAT_STRUCTURALLY_SYMMETRIC: 1730 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1731 break; 1732 case MAT_HERMITIAN: 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_SYMMETRY_ETERNAL: 1736 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1737 break; 1738 default: 1739 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatGetRow_MPIAIJ" 1746 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1747 { 1748 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1749 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1750 PetscErrorCode ierr; 1751 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1752 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1753 PetscInt *cmap,*idx_p; 1754 1755 PetscFunctionBegin; 1756 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1757 mat->getrowactive = PETSC_TRUE; 1758 1759 if (!mat->rowvalues && (idx || v)) { 1760 /* 1761 allocate enough space to hold information from the longest row. 1762 */ 1763 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1764 PetscInt max = 1,tmp; 1765 for (i=0; i<matin->rmap->n; i++) { 1766 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1767 if (max < tmp) max = tmp; 1768 } 1769 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1770 } 1771 1772 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1773 lrow = row - rstart; 1774 1775 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1776 if (!v) {pvA = 0; pvB = 0;} 1777 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1778 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1779 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1780 nztot = nzA + nzB; 1781 1782 cmap = mat->garray; 1783 if (v || idx) { 1784 if (nztot) { 1785 /* Sort by increasing column numbers, assuming A and B already sorted */ 1786 PetscInt imark = -1; 1787 if (v) { 1788 *v = v_p = mat->rowvalues; 1789 for (i=0; i<nzB; i++) { 1790 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1791 else break; 1792 } 1793 imark = i; 1794 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1795 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1796 } 1797 if (idx) { 1798 *idx = idx_p = mat->rowindices; 1799 if (imark > -1) { 1800 for (i=0; i<imark; i++) { 1801 idx_p[i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 for (i=0; i<nzB; i++) { 1805 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1806 else break; 1807 } 1808 imark = i; 1809 } 1810 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1811 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 if (idx) *idx = 0; 1815 if (v) *v = 0; 1816 } 1817 } 1818 *nz = nztot; 1819 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1820 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1826 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1827 { 1828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1829 1830 PetscFunctionBegin; 1831 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1832 aij->getrowactive = PETSC_FALSE; 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatNorm_MPIAIJ" 1838 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1841 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1842 PetscErrorCode ierr; 1843 PetscInt i,j,cstart = mat->cmap->rstart; 1844 PetscReal sum = 0.0; 1845 MatScalar *v; 1846 1847 PetscFunctionBegin; 1848 if (aij->size == 1) { 1849 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1850 } else { 1851 if (type == NORM_FROBENIUS) { 1852 v = amat->a; 1853 for (i=0; i<amat->nz; i++) { 1854 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1855 } 1856 v = bmat->a; 1857 for (i=0; i<bmat->nz; i++) { 1858 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1859 } 1860 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1861 *norm = PetscSqrtReal(*norm); 1862 } else if (type == NORM_1) { /* max column norm */ 1863 PetscReal *tmp,*tmp2; 1864 PetscInt *jj,*garray = aij->garray; 1865 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1866 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1867 *norm = 0.0; 1868 v = amat->a; jj = amat->j; 1869 for (j=0; j<amat->nz; j++) { 1870 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1871 } 1872 v = bmat->a; jj = bmat->j; 1873 for (j=0; j<bmat->nz; j++) { 1874 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1875 } 1876 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1877 for (j=0; j<mat->cmap->N; j++) { 1878 if (tmp2[j] > *norm) *norm = tmp2[j]; 1879 } 1880 ierr = PetscFree(tmp);CHKERRQ(ierr); 1881 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1882 } else if (type == NORM_INFINITY) { /* max row norm */ 1883 PetscReal ntemp = 0.0; 1884 for (j=0; j<aij->A->rmap->n; j++) { 1885 v = amat->a + amat->i[j]; 1886 sum = 0.0; 1887 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); v++; 1889 } 1890 v = bmat->a + bmat->i[j]; 1891 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1892 sum += PetscAbsScalar(*v); v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatTranspose_MPIAIJ" 1904 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1905 { 1906 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1907 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1908 PetscErrorCode ierr; 1909 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1910 PetscInt cstart = A->cmap->rstart,ncol; 1911 Mat B; 1912 MatScalar *array; 1913 1914 PetscFunctionBegin; 1915 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1916 1917 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1918 ai = Aloc->i; aj = Aloc->j; 1919 bi = Bloc->i; bj = Bloc->j; 1920 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1921 PetscInt *d_nnz,*g_nnz,*o_nnz; 1922 PetscSFNode *oloc; 1923 PETSC_UNUSED PetscSF sf; 1924 1925 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1926 /* compute d_nnz for preallocation */ 1927 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1928 for (i=0; i<ai[ma]; i++) { 1929 d_nnz[aj[i]]++; 1930 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1931 } 1932 /* compute local off-diagonal contributions */ 1933 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1934 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1935 /* map those to global */ 1936 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1937 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1938 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1939 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1940 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1941 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1942 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1943 1944 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1945 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1946 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 1947 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1948 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1949 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1950 } else { 1951 B = *matout; 1952 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1953 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1954 } 1955 1956 /* copy over the A part */ 1957 array = Aloc->a; 1958 row = A->rmap->rstart; 1959 for (i=0; i<ma; i++) { 1960 ncol = ai[i+1]-ai[i]; 1961 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1962 row++; 1963 array += ncol; aj += ncol; 1964 } 1965 aj = Aloc->j; 1966 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1967 1968 /* copy over the B part */ 1969 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1970 array = Bloc->a; 1971 row = A->rmap->rstart; 1972 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1973 cols_tmp = cols; 1974 for (i=0; i<mb; i++) { 1975 ncol = bi[i+1]-bi[i]; 1976 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1977 row++; 1978 array += ncol; cols_tmp += ncol; 1979 } 1980 ierr = PetscFree(cols);CHKERRQ(ierr); 1981 1982 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1983 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1984 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1985 *matout = B; 1986 } else { 1987 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1994 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1995 { 1996 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1997 Mat a = aij->A,b = aij->B; 1998 PetscErrorCode ierr; 1999 PetscInt s1,s2,s3; 2000 2001 PetscFunctionBegin; 2002 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2003 if (rr) { 2004 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2005 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2006 /* Overlap communication with computation. */ 2007 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2008 } 2009 if (ll) { 2010 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2011 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2012 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2013 } 2014 /* scale the diagonal block */ 2015 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2016 2017 if (rr) { 2018 /* Do a scatter end and then right scale the off-diagonal block */ 2019 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2030 PetscErrorCode ierr; 2031 2032 PetscFunctionBegin; 2033 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatEqual_MPIAIJ" 2039 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2040 { 2041 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2042 Mat a,b,c,d; 2043 PetscBool flg; 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 a = matA->A; b = matA->B; 2048 c = matB->A; d = matB->B; 2049 2050 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2051 if (flg) { 2052 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2053 } 2054 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatCopy_MPIAIJ" 2060 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2061 { 2062 PetscErrorCode ierr; 2063 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2064 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2065 2066 PetscFunctionBegin; 2067 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2068 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2069 /* because of the column compression in the off-processor part of the matrix a->B, 2070 the number of columns in a->B and b->B may be different, hence we cannot call 2071 the MatCopy() directly on the two parts. If need be, we can provide a more 2072 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2073 then copying the submatrices */ 2074 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2075 } else { 2076 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2077 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatSetUp_MPIAIJ" 2084 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2085 { 2086 PetscErrorCode ierr; 2087 2088 PetscFunctionBegin; 2089 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2090 PetscFunctionReturn(0); 2091 } 2092 2093 #undef __FUNCT__ 2094 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2095 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2096 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2097 { 2098 PetscInt i,m=Y->rmap->N; 2099 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2100 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2101 const PetscInt *xi = x->i,*yi = y->i; 2102 2103 PetscFunctionBegin; 2104 /* Set the number of nonzeros in the new matrix */ 2105 for (i=0; i<m; i++) { 2106 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2107 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2108 nnz[i] = 0; 2109 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2110 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2111 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2112 nnz[i]++; 2113 } 2114 for (; k<nzy; k++) nnz[i]++; 2115 } 2116 PetscFunctionReturn(0); 2117 } 2118 2119 #undef __FUNCT__ 2120 #define __FUNCT__ "MatAXPY_MPIAIJ" 2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 PetscInt i; 2125 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2126 PetscBLASInt bnz,one=1; 2127 Mat_SeqAIJ *x,*y; 2128 2129 PetscFunctionBegin; 2130 if (str == SAME_NONZERO_PATTERN) { 2131 PetscScalar alpha = a; 2132 x = (Mat_SeqAIJ*)xx->A->data; 2133 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2134 y = (Mat_SeqAIJ*)yy->A->data; 2135 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2136 x = (Mat_SeqAIJ*)xx->B->data; 2137 y = (Mat_SeqAIJ*)yy->B->data; 2138 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2139 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2140 } else if (str == SUBSET_NONZERO_PATTERN) { 2141 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2142 2143 x = (Mat_SeqAIJ*)xx->B->data; 2144 y = (Mat_SeqAIJ*)yy->B->data; 2145 if (y->xtoy && y->XtoY != xx->B) { 2146 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2147 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2148 } 2149 if (!y->xtoy) { /* get xtoy */ 2150 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2151 y->XtoY = xx->B; 2152 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2153 } 2154 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2155 } else { 2156 Mat B; 2157 PetscInt *nnz_d,*nnz_o; 2158 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2159 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2160 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2161 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2162 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2163 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2164 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2165 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2166 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2167 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2168 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2169 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2170 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2171 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatConjugate_MPIAIJ" 2180 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2181 { 2182 #if defined(PETSC_USE_COMPLEX) 2183 PetscErrorCode ierr; 2184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2185 2186 PetscFunctionBegin; 2187 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2188 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2189 #else 2190 PetscFunctionBegin; 2191 #endif 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatRealPart_MPIAIJ" 2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2198 { 2199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2200 PetscErrorCode ierr; 2201 2202 PetscFunctionBegin; 2203 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2204 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2217 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #if defined(PETSC_HAVE_PBGL) 2222 2223 #include <boost/parallel/mpi/bsp_process_group.hpp> 2224 #include <boost/graph/distributed/ilu_default_graph.hpp> 2225 #include <boost/graph/distributed/ilu_0_block.hpp> 2226 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2227 #include <boost/graph/distributed/petsc/interface.hpp> 2228 #include <boost/multi_array.hpp> 2229 #include <boost/parallel/distributed_property_map->hpp> 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2233 /* 2234 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2235 */ 2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2237 { 2238 namespace petsc = boost::distributed::petsc; 2239 2240 namespace graph_dist = boost::graph::distributed; 2241 using boost::graph::distributed::ilu_default::process_group_type; 2242 using boost::graph::ilu_permuted; 2243 2244 PetscBool row_identity, col_identity; 2245 PetscContainer c; 2246 PetscInt m, n, M, N; 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2251 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2252 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2253 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2254 2255 process_group_type pg; 2256 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2257 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2258 lgraph_type& level_graph = *lgraph_p; 2259 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2260 2261 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2262 ilu_permuted(level_graph); 2263 2264 /* put together the new matrix */ 2265 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2266 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2267 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2268 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2269 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2270 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2271 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 2274 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2275 ierr = PetscContainerSetPointer(c, lgraph_p); 2276 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2277 ierr = PetscContainerDestroy(&c); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2284 { 2285 PetscFunctionBegin; 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatSolve_MPIAIJ" 2291 /* 2292 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2293 */ 2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2295 { 2296 namespace graph_dist = boost::graph::distributed; 2297 2298 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2299 lgraph_type *lgraph_p; 2300 PetscContainer c; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2305 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2306 ierr = VecCopy(b, x);CHKERRQ(ierr); 2307 2308 PetscScalar *array_x; 2309 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2310 PetscInt sx; 2311 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2312 2313 PetscScalar *array_b; 2314 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2315 PetscInt sb; 2316 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2317 2318 lgraph_type& level_graph = *lgraph_p; 2319 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2320 2321 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2322 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2323 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2324 2325 typedef boost::iterator_property_map<array_ref_type::iterator, 2326 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2327 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2328 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2329 2330 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2331 PetscFunctionReturn(0); 2332 } 2333 #endif 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "MatDestroy_MatRedundant" 2337 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2338 { 2339 PetscErrorCode ierr; 2340 Mat_Redundant *redund; 2341 PetscInt i; 2342 PetscMPIInt size; 2343 2344 PetscFunctionBegin; 2345 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2346 if (size == 1) { 2347 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2348 redund = a->redundant; 2349 } else { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 redund = a->redundant; 2352 } 2353 if (redund){ 2354 if (redund->matseq) { /* via MatGetSubMatrices() */ 2355 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2356 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2357 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2358 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2359 } else { 2360 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2361 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2362 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2363 for (i=0; i<redund->nrecvs; i++) { 2364 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2365 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2366 } 2367 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2368 } 2369 2370 if (redund->psubcomm) { 2371 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2372 } 2373 ierr = redund->Destroy(A);CHKERRQ(ierr); 2374 ierr = PetscFree(redund);CHKERRQ(ierr); 2375 } 2376 PetscFunctionReturn(0); 2377 } 2378 2379 #undef __FUNCT__ 2380 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2381 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2382 { 2383 PetscMPIInt rank,size; 2384 MPI_Comm comm; 2385 PetscErrorCode ierr; 2386 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2387 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2388 PetscInt *rowrange = mat->rmap->range; 2389 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2390 Mat A = aij->A,B=aij->B,C=*matredundant; 2391 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2392 PetscScalar *sbuf_a; 2393 PetscInt nzlocal=a->nz+b->nz; 2394 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2395 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2396 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2397 MatScalar *aworkA,*aworkB; 2398 PetscScalar *vals; 2399 PetscMPIInt tag1,tag2,tag3,imdex; 2400 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2401 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2402 MPI_Status recv_status,*send_status; 2403 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2404 PetscInt **rbuf_j=NULL; 2405 PetscScalar **rbuf_a=NULL; 2406 Mat_Redundant *redund =NULL; 2407 2408 PetscFunctionBegin; 2409 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2410 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2411 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2412 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2413 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2414 2415 if (reuse == MAT_REUSE_MATRIX) { 2416 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2417 if (subsize == 1) { 2418 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2419 redund = c->redundant; 2420 } else { 2421 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2422 redund = c->redundant; 2423 } 2424 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2425 2426 nsends = redund->nsends; 2427 nrecvs = redund->nrecvs; 2428 send_rank = redund->send_rank; 2429 recv_rank = redund->recv_rank; 2430 sbuf_nz = redund->sbuf_nz; 2431 rbuf_nz = redund->rbuf_nz; 2432 sbuf_j = redund->sbuf_j; 2433 sbuf_a = redund->sbuf_a; 2434 rbuf_j = redund->rbuf_j; 2435 rbuf_a = redund->rbuf_a; 2436 } 2437 2438 if (reuse == MAT_INITIAL_MATRIX) { 2439 PetscInt nleftover,np_subcomm; 2440 2441 /* get the destination processors' id send_rank, nsends and nrecvs */ 2442 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2443 2444 np_subcomm = size/nsubcomm; 2445 nleftover = size - nsubcomm*np_subcomm; 2446 2447 /* block of codes below is specific for INTERLACED */ 2448 /* ------------------------------------------------*/ 2449 nsends = 0; nrecvs = 0; 2450 for (i=0; i<size; i++) { 2451 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2452 send_rank[nsends++] = i; 2453 recv_rank[nrecvs++] = i; 2454 } 2455 } 2456 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2457 i = size-nleftover-1; 2458 j = 0; 2459 while (j < nsubcomm - nleftover) { 2460 send_rank[nsends++] = i; 2461 i--; j++; 2462 } 2463 } 2464 2465 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2466 for (i=0; i<nleftover; i++) { 2467 recv_rank[nrecvs++] = size-nleftover+i; 2468 } 2469 } 2470 /*----------------------------------------------*/ 2471 2472 /* allocate sbuf_j, sbuf_a */ 2473 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2474 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2475 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2476 /* 2477 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2478 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2479 */ 2480 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2481 2482 /* copy mat's local entries into the buffers */ 2483 if (reuse == MAT_INITIAL_MATRIX) { 2484 rownz_max = 0; 2485 rptr = sbuf_j; 2486 cols = sbuf_j + rend-rstart + 1; 2487 vals = sbuf_a; 2488 rptr[0] = 0; 2489 for (i=0; i<rend-rstart; i++) { 2490 row = i + rstart; 2491 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2492 ncols = nzA + nzB; 2493 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2494 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2495 /* load the column indices for this row into cols */ 2496 lwrite = 0; 2497 for (l=0; l<nzB; l++) { 2498 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2499 vals[lwrite] = aworkB[l]; 2500 cols[lwrite++] = ctmp; 2501 } 2502 } 2503 for (l=0; l<nzA; l++) { 2504 vals[lwrite] = aworkA[l]; 2505 cols[lwrite++] = cstart + cworkA[l]; 2506 } 2507 for (l=0; l<nzB; l++) { 2508 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2509 vals[lwrite] = aworkB[l]; 2510 cols[lwrite++] = ctmp; 2511 } 2512 } 2513 vals += ncols; 2514 cols += ncols; 2515 rptr[i+1] = rptr[i] + ncols; 2516 if (rownz_max < ncols) rownz_max = ncols; 2517 } 2518 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2519 } else { /* only copy matrix values into sbuf_a */ 2520 rptr = sbuf_j; 2521 vals = sbuf_a; 2522 rptr[0] = 0; 2523 for (i=0; i<rend-rstart; i++) { 2524 row = i + rstart; 2525 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2526 ncols = nzA + nzB; 2527 cworkB = b->j + b->i[i]; 2528 aworkA = a->a + a->i[i]; 2529 aworkB = b->a + b->i[i]; 2530 lwrite = 0; 2531 for (l=0; l<nzB; l++) { 2532 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2533 } 2534 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2535 for (l=0; l<nzB; l++) { 2536 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2537 } 2538 vals += ncols; 2539 rptr[i+1] = rptr[i] + ncols; 2540 } 2541 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2542 2543 /* send nzlocal to others, and recv other's nzlocal */ 2544 /*--------------------------------------------------*/ 2545 if (reuse == MAT_INITIAL_MATRIX) { 2546 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2547 2548 s_waits2 = s_waits3 + nsends; 2549 s_waits1 = s_waits2 + nsends; 2550 r_waits1 = s_waits1 + nsends; 2551 r_waits2 = r_waits1 + nrecvs; 2552 r_waits3 = r_waits2 + nrecvs; 2553 } else { 2554 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2555 2556 r_waits3 = s_waits3 + nsends; 2557 } 2558 2559 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 /* get new tags to keep the communication clean */ 2562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2564 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2565 2566 /* post receives of other's nzlocal */ 2567 for (i=0; i<nrecvs; i++) { 2568 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2569 } 2570 /* send nzlocal to others */ 2571 for (i=0; i<nsends; i++) { 2572 sbuf_nz[i] = nzlocal; 2573 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2574 } 2575 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2576 count = nrecvs; 2577 while (count) { 2578 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2579 2580 recv_rank[imdex] = recv_status.MPI_SOURCE; 2581 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2582 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2583 2584 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2585 2586 rbuf_nz[imdex] += i + 2; 2587 2588 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2589 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2590 count--; 2591 } 2592 /* wait on sends of nzlocal */ 2593 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2594 /* send mat->i,j to others, and recv from other's */ 2595 /*------------------------------------------------*/ 2596 for (i=0; i<nsends; i++) { 2597 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2598 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2599 } 2600 /* wait on receives of mat->i,j */ 2601 /*------------------------------*/ 2602 count = nrecvs; 2603 while (count) { 2604 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2605 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2606 count--; 2607 } 2608 /* wait on sends of mat->i,j */ 2609 /*---------------------------*/ 2610 if (nsends) { 2611 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2612 } 2613 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2614 2615 /* post receives, send and receive mat->a */ 2616 /*----------------------------------------*/ 2617 for (imdex=0; imdex<nrecvs; imdex++) { 2618 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2619 } 2620 for (i=0; i<nsends; i++) { 2621 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2622 } 2623 count = nrecvs; 2624 while (count) { 2625 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2626 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2627 count--; 2628 } 2629 if (nsends) { 2630 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2631 } 2632 2633 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2634 2635 /* create redundant matrix */ 2636 /*-------------------------*/ 2637 if (reuse == MAT_INITIAL_MATRIX) { 2638 const PetscInt *range; 2639 PetscInt rstart_sub,rend_sub,mloc_sub; 2640 2641 /* compute rownz_max for preallocation */ 2642 for (imdex=0; imdex<nrecvs; imdex++) { 2643 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2644 rptr = rbuf_j[imdex]; 2645 for (i=0; i<j; i++) { 2646 ncols = rptr[i+1] - rptr[i]; 2647 if (rownz_max < ncols) rownz_max = ncols; 2648 } 2649 } 2650 2651 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2652 2653 /* get local size of redundant matrix 2654 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2655 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2656 rstart_sub = range[nsubcomm*subrank]; 2657 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2658 rend_sub = range[nsubcomm*(subrank+1)]; 2659 } else { 2660 rend_sub = mat->rmap->N; 2661 } 2662 mloc_sub = rend_sub - rstart_sub; 2663 2664 if (M == N) { 2665 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2666 } else { /* non-square matrix */ 2667 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2668 } 2669 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2670 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2671 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2672 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2673 } else { 2674 C = *matredundant; 2675 } 2676 2677 /* insert local matrix entries */ 2678 rptr = sbuf_j; 2679 cols = sbuf_j + rend-rstart + 1; 2680 vals = sbuf_a; 2681 for (i=0; i<rend-rstart; i++) { 2682 row = i + rstart; 2683 ncols = rptr[i+1] - rptr[i]; 2684 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2685 vals += ncols; 2686 cols += ncols; 2687 } 2688 /* insert received matrix entries */ 2689 for (imdex=0; imdex<nrecvs; imdex++) { 2690 rstart = rowrange[recv_rank[imdex]]; 2691 rend = rowrange[recv_rank[imdex]+1]; 2692 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2693 rptr = rbuf_j[imdex]; 2694 cols = rbuf_j[imdex] + rend-rstart + 1; 2695 vals = rbuf_a[imdex]; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 } 2704 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 2707 if (reuse == MAT_INITIAL_MATRIX) { 2708 *matredundant = C; 2709 2710 /* create a supporting struct and attach it to C for reuse */ 2711 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2712 if (subsize == 1) { 2713 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2714 c->redundant = redund; 2715 } else { 2716 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2717 c->redundant = redund; 2718 } 2719 2720 redund->nzlocal = nzlocal; 2721 redund->nsends = nsends; 2722 redund->nrecvs = nrecvs; 2723 redund->send_rank = send_rank; 2724 redund->recv_rank = recv_rank; 2725 redund->sbuf_nz = sbuf_nz; 2726 redund->rbuf_nz = rbuf_nz; 2727 redund->sbuf_j = sbuf_j; 2728 redund->sbuf_a = sbuf_a; 2729 redund->rbuf_j = rbuf_j; 2730 redund->rbuf_a = rbuf_a; 2731 redund->psubcomm = NULL; 2732 2733 redund->Destroy = C->ops->destroy; 2734 C->ops->destroy = MatDestroy_MatRedundant; 2735 } 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2741 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2742 { 2743 PetscErrorCode ierr; 2744 MPI_Comm comm; 2745 PetscMPIInt size,subsize; 2746 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2747 Mat_Redundant *redund=NULL; 2748 PetscSubcomm psubcomm=NULL; 2749 MPI_Comm subcomm_in=subcomm; 2750 Mat *matseq; 2751 IS isrow,iscol; 2752 2753 PetscFunctionBegin; 2754 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2755 if (reuse == MAT_INITIAL_MATRIX) { 2756 /* create psubcomm, then get subcomm */ 2757 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2758 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2759 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2760 2761 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2762 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2763 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2765 subcomm = psubcomm->comm; 2766 } else { /* retrieve psubcomm and subcomm */ 2767 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2768 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2769 if (subsize == 1) { 2770 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2771 redund = c->redundant; 2772 } else { 2773 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2774 redund = c->redundant; 2775 } 2776 psubcomm = redund->psubcomm; 2777 } 2778 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2779 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2780 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2781 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2782 if (subsize == 1) { 2783 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2784 c->redundant->psubcomm = psubcomm; 2785 } else { 2786 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2787 c->redundant->psubcomm = psubcomm ; 2788 } 2789 } 2790 PetscFunctionReturn(0); 2791 } 2792 } 2793 2794 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2795 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2796 if (reuse == MAT_INITIAL_MATRIX) { 2797 /* create a local sequential matrix matseq[0] */ 2798 mloc_sub = PETSC_DECIDE; 2799 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2800 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2801 rstart = rend - mloc_sub; 2802 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2803 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2804 } else { /* reuse == MAT_REUSE_MATRIX */ 2805 if (subsize == 1) { 2806 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2807 redund = c->redundant; 2808 } else { 2809 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2810 redund = c->redundant; 2811 } 2812 2813 isrow = redund->isrow; 2814 iscol = redund->iscol; 2815 matseq = redund->matseq; 2816 } 2817 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2818 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2819 2820 if (reuse == MAT_INITIAL_MATRIX) { 2821 /* create a supporting struct and attach it to C for reuse */ 2822 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2823 if (subsize == 1) { 2824 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2825 c->redundant = redund; 2826 } else { 2827 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2828 c->redundant = redund; 2829 } 2830 redund->isrow = isrow; 2831 redund->iscol = iscol; 2832 redund->matseq = matseq; 2833 redund->psubcomm = psubcomm; 2834 redund->Destroy = (*matredundant)->ops->destroy; 2835 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2836 } 2837 PetscFunctionReturn(0); 2838 } 2839 2840 #undef __FUNCT__ 2841 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2842 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2843 { 2844 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2845 PetscErrorCode ierr; 2846 PetscInt i,*idxb = 0; 2847 PetscScalar *va,*vb; 2848 Vec vtmp; 2849 2850 PetscFunctionBegin; 2851 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2852 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2853 if (idx) { 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2856 } 2857 } 2858 2859 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2860 if (idx) { 2861 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2862 } 2863 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2864 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2865 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2868 va[i] = vb[i]; 2869 if (idx) idx[i] = a->garray[idxb[i]]; 2870 } 2871 } 2872 2873 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2874 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2875 ierr = PetscFree(idxb);CHKERRQ(ierr); 2876 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2877 PetscFunctionReturn(0); 2878 } 2879 2880 #undef __FUNCT__ 2881 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2882 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2883 { 2884 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2885 PetscErrorCode ierr; 2886 PetscInt i,*idxb = 0; 2887 PetscScalar *va,*vb; 2888 Vec vtmp; 2889 2890 PetscFunctionBegin; 2891 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2892 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2893 if (idx) { 2894 for (i=0; i<A->cmap->n; i++) { 2895 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2896 } 2897 } 2898 2899 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2900 if (idx) { 2901 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2902 } 2903 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2904 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2905 2906 for (i=0; i<A->rmap->n; i++) { 2907 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2908 va[i] = vb[i]; 2909 if (idx) idx[i] = a->garray[idxb[i]]; 2910 } 2911 } 2912 2913 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2914 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2915 ierr = PetscFree(idxb);CHKERRQ(ierr); 2916 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2917 PetscFunctionReturn(0); 2918 } 2919 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2922 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2923 { 2924 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2925 PetscInt n = A->rmap->n; 2926 PetscInt cstart = A->cmap->rstart; 2927 PetscInt *cmap = mat->garray; 2928 PetscInt *diagIdx, *offdiagIdx; 2929 Vec diagV, offdiagV; 2930 PetscScalar *a, *diagA, *offdiagA; 2931 PetscInt r; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2936 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2938 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2940 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 for (r = 0; r < n; ++r) { 2944 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2945 a[r] = diagA[r]; 2946 idx[r] = cstart + diagIdx[r]; 2947 } else { 2948 a[r] = offdiagA[r]; 2949 idx[r] = cmap[offdiagIdx[r]]; 2950 } 2951 } 2952 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2956 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2957 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2963 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2964 { 2965 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2966 PetscInt n = A->rmap->n; 2967 PetscInt cstart = A->cmap->rstart; 2968 PetscInt *cmap = mat->garray; 2969 PetscInt *diagIdx, *offdiagIdx; 2970 Vec diagV, offdiagV; 2971 PetscScalar *a, *diagA, *offdiagA; 2972 PetscInt r; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2977 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2979 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2981 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 for (r = 0; r < n; ++r) { 2985 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2986 a[r] = diagA[r]; 2987 idx[r] = cstart + diagIdx[r]; 2988 } else { 2989 a[r] = offdiagA[r]; 2990 idx[r] = cmap[offdiagIdx[r]]; 2991 } 2992 } 2993 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2997 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2998 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 #undef __FUNCT__ 3003 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3004 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3005 { 3006 PetscErrorCode ierr; 3007 Mat *dummy; 3008 3009 PetscFunctionBegin; 3010 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3011 *newmat = *dummy; 3012 ierr = PetscFree(dummy);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3018 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3019 { 3020 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3021 PetscErrorCode ierr; 3022 3023 PetscFunctionBegin; 3024 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3030 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3031 { 3032 PetscErrorCode ierr; 3033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3034 3035 PetscFunctionBegin; 3036 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3037 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3038 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3039 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /* -------------------------------------------------------------------*/ 3044 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3045 MatGetRow_MPIAIJ, 3046 MatRestoreRow_MPIAIJ, 3047 MatMult_MPIAIJ, 3048 /* 4*/ MatMultAdd_MPIAIJ, 3049 MatMultTranspose_MPIAIJ, 3050 MatMultTransposeAdd_MPIAIJ, 3051 #if defined(PETSC_HAVE_PBGL) 3052 MatSolve_MPIAIJ, 3053 #else 3054 0, 3055 #endif 3056 0, 3057 0, 3058 /*10*/ 0, 3059 0, 3060 0, 3061 MatSOR_MPIAIJ, 3062 MatTranspose_MPIAIJ, 3063 /*15*/ MatGetInfo_MPIAIJ, 3064 MatEqual_MPIAIJ, 3065 MatGetDiagonal_MPIAIJ, 3066 MatDiagonalScale_MPIAIJ, 3067 MatNorm_MPIAIJ, 3068 /*20*/ MatAssemblyBegin_MPIAIJ, 3069 MatAssemblyEnd_MPIAIJ, 3070 MatSetOption_MPIAIJ, 3071 MatZeroEntries_MPIAIJ, 3072 /*24*/ MatZeroRows_MPIAIJ, 3073 0, 3074 #if defined(PETSC_HAVE_PBGL) 3075 0, 3076 #else 3077 0, 3078 #endif 3079 0, 3080 0, 3081 /*29*/ MatSetUp_MPIAIJ, 3082 #if defined(PETSC_HAVE_PBGL) 3083 0, 3084 #else 3085 0, 3086 #endif 3087 0, 3088 0, 3089 0, 3090 /*34*/ MatDuplicate_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*39*/ MatAXPY_MPIAIJ, 3096 MatGetSubMatrices_MPIAIJ, 3097 MatIncreaseOverlap_MPIAIJ, 3098 MatGetValues_MPIAIJ, 3099 MatCopy_MPIAIJ, 3100 /*44*/ MatGetRowMax_MPIAIJ, 3101 MatScale_MPIAIJ, 3102 0, 3103 0, 3104 MatZeroRowsColumns_MPIAIJ, 3105 /*49*/ MatSetRandom_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*54*/ MatFDColoringCreate_MPIXAIJ, 3111 0, 3112 MatSetUnfactored_MPIAIJ, 3113 MatPermute_MPIAIJ, 3114 0, 3115 /*59*/ MatGetSubMatrix_MPIAIJ, 3116 MatDestroy_MPIAIJ, 3117 MatView_MPIAIJ, 3118 0, 3119 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3120 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3121 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3122 0, 3123 0, 3124 0, 3125 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3126 MatGetRowMinAbs_MPIAIJ, 3127 0, 3128 MatSetColoring_MPIAIJ, 3129 0, 3130 MatSetValuesAdifor_MPIAIJ, 3131 /*75*/ MatFDColoringApply_AIJ, 3132 0, 3133 0, 3134 0, 3135 MatFindZeroDiagonals_MPIAIJ, 3136 /*80*/ 0, 3137 0, 3138 0, 3139 /*83*/ MatLoad_MPIAIJ, 3140 0, 3141 0, 3142 0, 3143 0, 3144 0, 3145 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3146 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3147 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3148 MatPtAP_MPIAIJ_MPIAIJ, 3149 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3150 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3151 0, 3152 0, 3153 0, 3154 0, 3155 /*99*/ 0, 3156 0, 3157 0, 3158 MatConjugate_MPIAIJ, 3159 0, 3160 /*104*/MatSetValuesRow_MPIAIJ, 3161 MatRealPart_MPIAIJ, 3162 MatImaginaryPart_MPIAIJ, 3163 0, 3164 0, 3165 /*109*/0, 3166 MatGetRedundantMatrix_MPIAIJ, 3167 MatGetRowMin_MPIAIJ, 3168 0, 3169 0, 3170 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3171 0, 3172 0, 3173 0, 3174 0, 3175 /*119*/0, 3176 0, 3177 0, 3178 0, 3179 MatGetMultiProcBlock_MPIAIJ, 3180 /*124*/MatFindNonzeroRows_MPIAIJ, 3181 MatGetColumnNorms_MPIAIJ, 3182 MatInvertBlockDiagonal_MPIAIJ, 3183 0, 3184 MatGetSubMatricesParallel_MPIAIJ, 3185 /*129*/0, 3186 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3187 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3188 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3189 0, 3190 /*134*/0, 3191 0, 3192 0, 3193 0, 3194 0, 3195 /*139*/0, 3196 0, 3197 0, 3198 MatFDColoringSetUp_MPIXAIJ 3199 }; 3200 3201 /* ----------------------------------------------------------------------------------------*/ 3202 3203 #undef __FUNCT__ 3204 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3205 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3206 { 3207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3208 PetscErrorCode ierr; 3209 3210 PetscFunctionBegin; 3211 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3212 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3213 PetscFunctionReturn(0); 3214 } 3215 3216 #undef __FUNCT__ 3217 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3218 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3219 { 3220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3225 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3231 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3232 { 3233 Mat_MPIAIJ *b; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3238 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3239 b = (Mat_MPIAIJ*)B->data; 3240 3241 if (!B->preallocated) { 3242 /* Explicitly create 2 MATSEQAIJ matrices. */ 3243 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3244 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3245 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3246 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3247 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3248 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3249 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3250 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3251 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3252 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3253 } 3254 3255 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3256 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3257 B->preallocated = PETSC_TRUE; 3258 PetscFunctionReturn(0); 3259 } 3260 3261 #undef __FUNCT__ 3262 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3263 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3264 { 3265 Mat mat; 3266 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3267 PetscErrorCode ierr; 3268 3269 PetscFunctionBegin; 3270 *newmat = 0; 3271 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3272 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3273 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3274 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3275 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3276 a = (Mat_MPIAIJ*)mat->data; 3277 3278 mat->factortype = matin->factortype; 3279 mat->rmap->bs = matin->rmap->bs; 3280 mat->cmap->bs = matin->cmap->bs; 3281 mat->assembled = PETSC_TRUE; 3282 mat->insertmode = NOT_SET_VALUES; 3283 mat->preallocated = PETSC_TRUE; 3284 3285 a->size = oldmat->size; 3286 a->rank = oldmat->rank; 3287 a->donotstash = oldmat->donotstash; 3288 a->roworiented = oldmat->roworiented; 3289 a->rowindices = 0; 3290 a->rowvalues = 0; 3291 a->getrowactive = PETSC_FALSE; 3292 3293 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3294 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3295 3296 if (oldmat->colmap) { 3297 #if defined(PETSC_USE_CTABLE) 3298 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3299 #else 3300 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3301 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3302 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3303 #endif 3304 } else a->colmap = 0; 3305 if (oldmat->garray) { 3306 PetscInt len; 3307 len = oldmat->B->cmap->n; 3308 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3309 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3310 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3311 } else a->garray = 0; 3312 3313 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3314 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3315 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3317 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3319 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3321 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3322 *newmat = mat; 3323 PetscFunctionReturn(0); 3324 } 3325 3326 3327 3328 #undef __FUNCT__ 3329 #define __FUNCT__ "MatLoad_MPIAIJ" 3330 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3331 { 3332 PetscScalar *vals,*svals; 3333 MPI_Comm comm; 3334 PetscErrorCode ierr; 3335 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3336 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3337 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3338 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3339 PetscInt cend,cstart,n,*rowners,sizesset=1; 3340 int fd; 3341 PetscInt bs = 1; 3342 3343 PetscFunctionBegin; 3344 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3345 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3346 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3347 if (!rank) { 3348 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3349 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3350 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3351 } 3352 3353 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3354 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3355 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3356 3357 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3358 3359 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3360 M = header[1]; N = header[2]; 3361 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3362 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3363 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3364 3365 /* If global sizes are set, check if they are consistent with that given in the file */ 3366 if (sizesset) { 3367 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3368 } 3369 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3370 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3371 3372 /* determine ownership of all (block) rows */ 3373 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3374 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3375 else m = newMat->rmap->n; /* Set by user */ 3376 3377 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3378 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3379 3380 /* First process needs enough room for process with most rows */ 3381 if (!rank) { 3382 mmax = rowners[1]; 3383 for (i=2; i<=size; i++) { 3384 mmax = PetscMax(mmax, rowners[i]); 3385 } 3386 } else mmax = -1; /* unused, but compilers complain */ 3387 3388 rowners[0] = 0; 3389 for (i=2; i<=size; i++) { 3390 rowners[i] += rowners[i-1]; 3391 } 3392 rstart = rowners[rank]; 3393 rend = rowners[rank+1]; 3394 3395 /* distribute row lengths to all processors */ 3396 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3397 if (!rank) { 3398 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3399 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3400 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3401 for (j=0; j<m; j++) { 3402 procsnz[0] += ourlens[j]; 3403 } 3404 for (i=1; i<size; i++) { 3405 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3406 /* calculate the number of nonzeros on each processor */ 3407 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3408 procsnz[i] += rowlengths[j]; 3409 } 3410 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3411 } 3412 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3413 } else { 3414 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3415 } 3416 3417 if (!rank) { 3418 /* determine max buffer needed and allocate it */ 3419 maxnz = 0; 3420 for (i=0; i<size; i++) { 3421 maxnz = PetscMax(maxnz,procsnz[i]); 3422 } 3423 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3424 3425 /* read in my part of the matrix column indices */ 3426 nz = procsnz[0]; 3427 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3428 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3429 3430 /* read in every one elses and ship off */ 3431 for (i=1; i<size; i++) { 3432 nz = procsnz[i]; 3433 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3434 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3435 } 3436 ierr = PetscFree(cols);CHKERRQ(ierr); 3437 } else { 3438 /* determine buffer space needed for message */ 3439 nz = 0; 3440 for (i=0; i<m; i++) { 3441 nz += ourlens[i]; 3442 } 3443 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3444 3445 /* receive message of column indices*/ 3446 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3447 } 3448 3449 /* determine column ownership if matrix is not square */ 3450 if (N != M) { 3451 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3452 else n = newMat->cmap->n; 3453 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3454 cstart = cend - n; 3455 } else { 3456 cstart = rstart; 3457 cend = rend; 3458 n = cend - cstart; 3459 } 3460 3461 /* loop over local rows, determining number of off diagonal entries */ 3462 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3463 jj = 0; 3464 for (i=0; i<m; i++) { 3465 for (j=0; j<ourlens[i]; j++) { 3466 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3467 jj++; 3468 } 3469 } 3470 3471 for (i=0; i<m; i++) { 3472 ourlens[i] -= offlens[i]; 3473 } 3474 if (!sizesset) { 3475 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3476 } 3477 3478 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3479 3480 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3481 3482 for (i=0; i<m; i++) { 3483 ourlens[i] += offlens[i]; 3484 } 3485 3486 if (!rank) { 3487 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3488 3489 /* read in my part of the matrix numerical values */ 3490 nz = procsnz[0]; 3491 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3492 3493 /* insert into matrix */ 3494 jj = rstart; 3495 smycols = mycols; 3496 svals = vals; 3497 for (i=0; i<m; i++) { 3498 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3499 smycols += ourlens[i]; 3500 svals += ourlens[i]; 3501 jj++; 3502 } 3503 3504 /* read in other processors and ship out */ 3505 for (i=1; i<size; i++) { 3506 nz = procsnz[i]; 3507 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3508 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3509 } 3510 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3511 } else { 3512 /* receive numeric values */ 3513 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3514 3515 /* receive message of values*/ 3516 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3517 3518 /* insert into matrix */ 3519 jj = rstart; 3520 smycols = mycols; 3521 svals = vals; 3522 for (i=0; i<m; i++) { 3523 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3524 smycols += ourlens[i]; 3525 svals += ourlens[i]; 3526 jj++; 3527 } 3528 } 3529 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3530 ierr = PetscFree(vals);CHKERRQ(ierr); 3531 ierr = PetscFree(mycols);CHKERRQ(ierr); 3532 ierr = PetscFree(rowners);CHKERRQ(ierr); 3533 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3535 PetscFunctionReturn(0); 3536 } 3537 3538 #undef __FUNCT__ 3539 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3540 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3541 { 3542 PetscErrorCode ierr; 3543 IS iscol_local; 3544 PetscInt csize; 3545 3546 PetscFunctionBegin; 3547 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3548 if (call == MAT_REUSE_MATRIX) { 3549 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3550 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3551 } else { 3552 PetscInt cbs; 3553 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3554 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3555 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3556 } 3557 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3558 if (call == MAT_INITIAL_MATRIX) { 3559 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3560 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3561 } 3562 PetscFunctionReturn(0); 3563 } 3564 3565 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3566 #undef __FUNCT__ 3567 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3568 /* 3569 Not great since it makes two copies of the submatrix, first an SeqAIJ 3570 in local and then by concatenating the local matrices the end result. 3571 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3572 3573 Note: This requires a sequential iscol with all indices. 3574 */ 3575 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3576 { 3577 PetscErrorCode ierr; 3578 PetscMPIInt rank,size; 3579 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3580 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3581 PetscBool allcolumns, colflag; 3582 Mat M,Mreuse; 3583 MatScalar *vwork,*aa; 3584 MPI_Comm comm; 3585 Mat_SeqAIJ *aij; 3586 3587 PetscFunctionBegin; 3588 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3589 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3591 3592 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3593 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3594 if (colflag && ncol == mat->cmap->N) { 3595 allcolumns = PETSC_TRUE; 3596 } else { 3597 allcolumns = PETSC_FALSE; 3598 } 3599 if (call == MAT_REUSE_MATRIX) { 3600 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3601 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3602 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3603 } else { 3604 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3605 } 3606 3607 /* 3608 m - number of local rows 3609 n - number of columns (same on all processors) 3610 rstart - first row in new global matrix generated 3611 */ 3612 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3613 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3614 if (call == MAT_INITIAL_MATRIX) { 3615 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3616 ii = aij->i; 3617 jj = aij->j; 3618 3619 /* 3620 Determine the number of non-zeros in the diagonal and off-diagonal 3621 portions of the matrix in order to do correct preallocation 3622 */ 3623 3624 /* first get start and end of "diagonal" columns */ 3625 if (csize == PETSC_DECIDE) { 3626 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3627 if (mglobal == n) { /* square matrix */ 3628 nlocal = m; 3629 } else { 3630 nlocal = n/size + ((n % size) > rank); 3631 } 3632 } else { 3633 nlocal = csize; 3634 } 3635 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3636 rstart = rend - nlocal; 3637 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3638 3639 /* next, compute all the lengths */ 3640 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3641 olens = dlens + m; 3642 for (i=0; i<m; i++) { 3643 jend = ii[i+1] - ii[i]; 3644 olen = 0; 3645 dlen = 0; 3646 for (j=0; j<jend; j++) { 3647 if (*jj < rstart || *jj >= rend) olen++; 3648 else dlen++; 3649 jj++; 3650 } 3651 olens[i] = olen; 3652 dlens[i] = dlen; 3653 } 3654 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3655 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3656 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3657 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3658 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3659 ierr = PetscFree(dlens);CHKERRQ(ierr); 3660 } else { 3661 PetscInt ml,nl; 3662 3663 M = *newmat; 3664 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3665 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3666 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3667 /* 3668 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3669 rather than the slower MatSetValues(). 3670 */ 3671 M->was_assembled = PETSC_TRUE; 3672 M->assembled = PETSC_FALSE; 3673 } 3674 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3675 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3676 ii = aij->i; 3677 jj = aij->j; 3678 aa = aij->a; 3679 for (i=0; i<m; i++) { 3680 row = rstart + i; 3681 nz = ii[i+1] - ii[i]; 3682 cwork = jj; jj += nz; 3683 vwork = aa; aa += nz; 3684 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3685 } 3686 3687 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3689 *newmat = M; 3690 3691 /* save submatrix used in processor for next request */ 3692 if (call == MAT_INITIAL_MATRIX) { 3693 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3694 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3695 } 3696 PetscFunctionReturn(0); 3697 } 3698 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3701 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3702 { 3703 PetscInt m,cstart, cend,j,nnz,i,d; 3704 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3705 const PetscInt *JJ; 3706 PetscScalar *values; 3707 PetscErrorCode ierr; 3708 3709 PetscFunctionBegin; 3710 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3711 3712 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3713 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3714 m = B->rmap->n; 3715 cstart = B->cmap->rstart; 3716 cend = B->cmap->rend; 3717 rstart = B->rmap->rstart; 3718 3719 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3720 3721 #if defined(PETSC_USE_DEBUGGING) 3722 for (i=0; i<m; i++) { 3723 nnz = Ii[i+1]- Ii[i]; 3724 JJ = J + Ii[i]; 3725 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3726 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3727 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3728 } 3729 #endif 3730 3731 for (i=0; i<m; i++) { 3732 nnz = Ii[i+1]- Ii[i]; 3733 JJ = J + Ii[i]; 3734 nnz_max = PetscMax(nnz_max,nnz); 3735 d = 0; 3736 for (j=0; j<nnz; j++) { 3737 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3738 } 3739 d_nnz[i] = d; 3740 o_nnz[i] = nnz - d; 3741 } 3742 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3743 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3744 3745 if (v) values = (PetscScalar*)v; 3746 else { 3747 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3748 } 3749 3750 for (i=0; i<m; i++) { 3751 ii = i + rstart; 3752 nnz = Ii[i+1]- Ii[i]; 3753 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3754 } 3755 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3757 3758 if (!v) { 3759 ierr = PetscFree(values);CHKERRQ(ierr); 3760 } 3761 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3762 PetscFunctionReturn(0); 3763 } 3764 3765 #undef __FUNCT__ 3766 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3767 /*@ 3768 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3769 (the default parallel PETSc format). 3770 3771 Collective on MPI_Comm 3772 3773 Input Parameters: 3774 + B - the matrix 3775 . i - the indices into j for the start of each local row (starts with zero) 3776 . j - the column indices for each local row (starts with zero) 3777 - v - optional values in the matrix 3778 3779 Level: developer 3780 3781 Notes: 3782 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3783 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3784 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3785 3786 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3787 3788 The format which is used for the sparse matrix input, is equivalent to a 3789 row-major ordering.. i.e for the following matrix, the input data expected is 3790 as shown: 3791 3792 1 0 0 3793 2 0 3 P0 3794 ------- 3795 4 5 6 P1 3796 3797 Process0 [P0]: rows_owned=[0,1] 3798 i = {0,1,3} [size = nrow+1 = 2+1] 3799 j = {0,0,2} [size = nz = 6] 3800 v = {1,2,3} [size = nz = 6] 3801 3802 Process1 [P1]: rows_owned=[2] 3803 i = {0,3} [size = nrow+1 = 1+1] 3804 j = {0,1,2} [size = nz = 6] 3805 v = {4,5,6} [size = nz = 6] 3806 3807 .keywords: matrix, aij, compressed row, sparse, parallel 3808 3809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3810 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3811 @*/ 3812 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3813 { 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3818 PetscFunctionReturn(0); 3819 } 3820 3821 #undef __FUNCT__ 3822 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3823 /*@C 3824 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3825 (the default parallel PETSc format). For good matrix assembly performance 3826 the user should preallocate the matrix storage by setting the parameters 3827 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3828 performance can be increased by more than a factor of 50. 3829 3830 Collective on MPI_Comm 3831 3832 Input Parameters: 3833 + A - the matrix 3834 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3835 (same value is used for all local rows) 3836 . d_nnz - array containing the number of nonzeros in the various rows of the 3837 DIAGONAL portion of the local submatrix (possibly different for each row) 3838 or NULL, if d_nz is used to specify the nonzero structure. 3839 The size of this array is equal to the number of local rows, i.e 'm'. 3840 For matrices that will be factored, you must leave room for (and set) 3841 the diagonal entry even if it is zero. 3842 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3843 submatrix (same value is used for all local rows). 3844 - o_nnz - array containing the number of nonzeros in the various rows of the 3845 OFF-DIAGONAL portion of the local submatrix (possibly different for 3846 each row) or NULL, if o_nz is used to specify the nonzero 3847 structure. The size of this array is equal to the number 3848 of local rows, i.e 'm'. 3849 3850 If the *_nnz parameter is given then the *_nz parameter is ignored 3851 3852 The AIJ format (also called the Yale sparse matrix format or 3853 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3854 storage. The stored row and column indices begin with zero. 3855 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3856 3857 The parallel matrix is partitioned such that the first m0 rows belong to 3858 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3859 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3860 3861 The DIAGONAL portion of the local submatrix of a processor can be defined 3862 as the submatrix which is obtained by extraction the part corresponding to 3863 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3864 first row that belongs to the processor, r2 is the last row belonging to 3865 the this processor, and c1-c2 is range of indices of the local part of a 3866 vector suitable for applying the matrix to. This is an mxn matrix. In the 3867 common case of a square matrix, the row and column ranges are the same and 3868 the DIAGONAL part is also square. The remaining portion of the local 3869 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3870 3871 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3872 3873 You can call MatGetInfo() to get information on how effective the preallocation was; 3874 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3875 You can also run with the option -info and look for messages with the string 3876 malloc in them to see if additional memory allocation was needed. 3877 3878 Example usage: 3879 3880 Consider the following 8x8 matrix with 34 non-zero values, that is 3881 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3882 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3883 as follows: 3884 3885 .vb 3886 1 2 0 | 0 3 0 | 0 4 3887 Proc0 0 5 6 | 7 0 0 | 8 0 3888 9 0 10 | 11 0 0 | 12 0 3889 ------------------------------------- 3890 13 0 14 | 15 16 17 | 0 0 3891 Proc1 0 18 0 | 19 20 21 | 0 0 3892 0 0 0 | 22 23 0 | 24 0 3893 ------------------------------------- 3894 Proc2 25 26 27 | 0 0 28 | 29 0 3895 30 0 0 | 31 32 33 | 0 34 3896 .ve 3897 3898 This can be represented as a collection of submatrices as: 3899 3900 .vb 3901 A B C 3902 D E F 3903 G H I 3904 .ve 3905 3906 Where the submatrices A,B,C are owned by proc0, D,E,F are 3907 owned by proc1, G,H,I are owned by proc2. 3908 3909 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3910 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3911 The 'M','N' parameters are 8,8, and have the same values on all procs. 3912 3913 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3914 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3915 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3916 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3917 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3918 matrix, ans [DF] as another SeqAIJ matrix. 3919 3920 When d_nz, o_nz parameters are specified, d_nz storage elements are 3921 allocated for every row of the local diagonal submatrix, and o_nz 3922 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3923 One way to choose d_nz and o_nz is to use the max nonzerors per local 3924 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3925 In this case, the values of d_nz,o_nz are: 3926 .vb 3927 proc0 : dnz = 2, o_nz = 2 3928 proc1 : dnz = 3, o_nz = 2 3929 proc2 : dnz = 1, o_nz = 4 3930 .ve 3931 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3932 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3933 for proc3. i.e we are using 12+15+10=37 storage locations to store 3934 34 values. 3935 3936 When d_nnz, o_nnz parameters are specified, the storage is specified 3937 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3938 In the above case the values for d_nnz,o_nnz are: 3939 .vb 3940 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3941 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3942 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3943 .ve 3944 Here the space allocated is sum of all the above values i.e 34, and 3945 hence pre-allocation is perfect. 3946 3947 Level: intermediate 3948 3949 .keywords: matrix, aij, compressed row, sparse, parallel 3950 3951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3952 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3953 @*/ 3954 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3955 { 3956 PetscErrorCode ierr; 3957 3958 PetscFunctionBegin; 3959 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3960 PetscValidType(B,1); 3961 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3962 PetscFunctionReturn(0); 3963 } 3964 3965 #undef __FUNCT__ 3966 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3967 /*@ 3968 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3969 CSR format the local rows. 3970 3971 Collective on MPI_Comm 3972 3973 Input Parameters: 3974 + comm - MPI communicator 3975 . m - number of local rows (Cannot be PETSC_DECIDE) 3976 . n - This value should be the same as the local size used in creating the 3977 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3978 calculated if N is given) For square matrices n is almost always m. 3979 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3980 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3981 . i - row indices 3982 . j - column indices 3983 - a - matrix values 3984 3985 Output Parameter: 3986 . mat - the matrix 3987 3988 Level: intermediate 3989 3990 Notes: 3991 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3992 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3993 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3994 3995 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3996 3997 The format which is used for the sparse matrix input, is equivalent to a 3998 row-major ordering.. i.e for the following matrix, the input data expected is 3999 as shown: 4000 4001 1 0 0 4002 2 0 3 P0 4003 ------- 4004 4 5 6 P1 4005 4006 Process0 [P0]: rows_owned=[0,1] 4007 i = {0,1,3} [size = nrow+1 = 2+1] 4008 j = {0,0,2} [size = nz = 6] 4009 v = {1,2,3} [size = nz = 6] 4010 4011 Process1 [P1]: rows_owned=[2] 4012 i = {0,3} [size = nrow+1 = 1+1] 4013 j = {0,1,2} [size = nz = 6] 4014 v = {4,5,6} [size = nz = 6] 4015 4016 .keywords: matrix, aij, compressed row, sparse, parallel 4017 4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4019 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4020 @*/ 4021 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4022 { 4023 PetscErrorCode ierr; 4024 4025 PetscFunctionBegin; 4026 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4027 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4028 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4029 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4030 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4031 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4032 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4033 PetscFunctionReturn(0); 4034 } 4035 4036 #undef __FUNCT__ 4037 #define __FUNCT__ "MatCreateAIJ" 4038 /*@C 4039 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4040 (the default parallel PETSc format). For good matrix assembly performance 4041 the user should preallocate the matrix storage by setting the parameters 4042 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4043 performance can be increased by more than a factor of 50. 4044 4045 Collective on MPI_Comm 4046 4047 Input Parameters: 4048 + comm - MPI communicator 4049 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4050 This value should be the same as the local size used in creating the 4051 y vector for the matrix-vector product y = Ax. 4052 . n - This value should be the same as the local size used in creating the 4053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4054 calculated if N is given) For square matrices n is almost always m. 4055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4057 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4058 (same value is used for all local rows) 4059 . d_nnz - array containing the number of nonzeros in the various rows of the 4060 DIAGONAL portion of the local submatrix (possibly different for each row) 4061 or NULL, if d_nz is used to specify the nonzero structure. 4062 The size of this array is equal to the number of local rows, i.e 'm'. 4063 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4064 submatrix (same value is used for all local rows). 4065 - o_nnz - array containing the number of nonzeros in the various rows of the 4066 OFF-DIAGONAL portion of the local submatrix (possibly different for 4067 each row) or NULL, if o_nz is used to specify the nonzero 4068 structure. The size of this array is equal to the number 4069 of local rows, i.e 'm'. 4070 4071 Output Parameter: 4072 . A - the matrix 4073 4074 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4075 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4076 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4077 4078 Notes: 4079 If the *_nnz parameter is given then the *_nz parameter is ignored 4080 4081 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4082 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4083 storage requirements for this matrix. 4084 4085 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4086 processor than it must be used on all processors that share the object for 4087 that argument. 4088 4089 The user MUST specify either the local or global matrix dimensions 4090 (possibly both). 4091 4092 The parallel matrix is partitioned across processors such that the 4093 first m0 rows belong to process 0, the next m1 rows belong to 4094 process 1, the next m2 rows belong to process 2 etc.. where 4095 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4096 values corresponding to [m x N] submatrix. 4097 4098 The columns are logically partitioned with the n0 columns belonging 4099 to 0th partition, the next n1 columns belonging to the next 4100 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4101 4102 The DIAGONAL portion of the local submatrix on any given processor 4103 is the submatrix corresponding to the rows and columns m,n 4104 corresponding to the given processor. i.e diagonal matrix on 4105 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4106 etc. The remaining portion of the local submatrix [m x (N-n)] 4107 constitute the OFF-DIAGONAL portion. The example below better 4108 illustrates this concept. 4109 4110 For a square global matrix we define each processor's diagonal portion 4111 to be its local rows and the corresponding columns (a square submatrix); 4112 each processor's off-diagonal portion encompasses the remainder of the 4113 local matrix (a rectangular submatrix). 4114 4115 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4116 4117 When calling this routine with a single process communicator, a matrix of 4118 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4119 type of communicator, use the construction mechanism: 4120 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4121 4122 By default, this format uses inodes (identical nodes) when possible. 4123 We search for consecutive rows with the same nonzero structure, thereby 4124 reusing matrix information to achieve increased efficiency. 4125 4126 Options Database Keys: 4127 + -mat_no_inode - Do not use inodes 4128 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4129 - -mat_aij_oneindex - Internally use indexing starting at 1 4130 rather than 0. Note that when calling MatSetValues(), 4131 the user still MUST index entries starting at 0! 4132 4133 4134 Example usage: 4135 4136 Consider the following 8x8 matrix with 34 non-zero values, that is 4137 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4138 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4139 as follows: 4140 4141 .vb 4142 1 2 0 | 0 3 0 | 0 4 4143 Proc0 0 5 6 | 7 0 0 | 8 0 4144 9 0 10 | 11 0 0 | 12 0 4145 ------------------------------------- 4146 13 0 14 | 15 16 17 | 0 0 4147 Proc1 0 18 0 | 19 20 21 | 0 0 4148 0 0 0 | 22 23 0 | 24 0 4149 ------------------------------------- 4150 Proc2 25 26 27 | 0 0 28 | 29 0 4151 30 0 0 | 31 32 33 | 0 34 4152 .ve 4153 4154 This can be represented as a collection of submatrices as: 4155 4156 .vb 4157 A B C 4158 D E F 4159 G H I 4160 .ve 4161 4162 Where the submatrices A,B,C are owned by proc0, D,E,F are 4163 owned by proc1, G,H,I are owned by proc2. 4164 4165 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4166 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4167 The 'M','N' parameters are 8,8, and have the same values on all procs. 4168 4169 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4170 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4171 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4172 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4173 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4174 matrix, ans [DF] as another SeqAIJ matrix. 4175 4176 When d_nz, o_nz parameters are specified, d_nz storage elements are 4177 allocated for every row of the local diagonal submatrix, and o_nz 4178 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4179 One way to choose d_nz and o_nz is to use the max nonzerors per local 4180 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4181 In this case, the values of d_nz,o_nz are: 4182 .vb 4183 proc0 : dnz = 2, o_nz = 2 4184 proc1 : dnz = 3, o_nz = 2 4185 proc2 : dnz = 1, o_nz = 4 4186 .ve 4187 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4188 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4189 for proc3. i.e we are using 12+15+10=37 storage locations to store 4190 34 values. 4191 4192 When d_nnz, o_nnz parameters are specified, the storage is specified 4193 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4194 In the above case the values for d_nnz,o_nnz are: 4195 .vb 4196 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4197 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4198 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4199 .ve 4200 Here the space allocated is sum of all the above values i.e 34, and 4201 hence pre-allocation is perfect. 4202 4203 Level: intermediate 4204 4205 .keywords: matrix, aij, compressed row, sparse, parallel 4206 4207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4208 MPIAIJ, MatCreateMPIAIJWithArrays() 4209 @*/ 4210 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4211 { 4212 PetscErrorCode ierr; 4213 PetscMPIInt size; 4214 4215 PetscFunctionBegin; 4216 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4217 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4218 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4219 if (size > 1) { 4220 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4221 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4222 } else { 4223 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4224 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4225 } 4226 PetscFunctionReturn(0); 4227 } 4228 4229 #undef __FUNCT__ 4230 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4231 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4232 { 4233 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4234 4235 PetscFunctionBegin; 4236 *Ad = a->A; 4237 *Ao = a->B; 4238 *colmap = a->garray; 4239 PetscFunctionReturn(0); 4240 } 4241 4242 #undef __FUNCT__ 4243 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4244 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4245 { 4246 PetscErrorCode ierr; 4247 PetscInt i; 4248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4249 4250 PetscFunctionBegin; 4251 if (coloring->ctype == IS_COLORING_GLOBAL) { 4252 ISColoringValue *allcolors,*colors; 4253 ISColoring ocoloring; 4254 4255 /* set coloring for diagonal portion */ 4256 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4257 4258 /* set coloring for off-diagonal portion */ 4259 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4260 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4261 for (i=0; i<a->B->cmap->n; i++) { 4262 colors[i] = allcolors[a->garray[i]]; 4263 } 4264 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4265 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4266 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4267 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4268 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4269 ISColoringValue *colors; 4270 PetscInt *larray; 4271 ISColoring ocoloring; 4272 4273 /* set coloring for diagonal portion */ 4274 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4275 for (i=0; i<a->A->cmap->n; i++) { 4276 larray[i] = i + A->cmap->rstart; 4277 } 4278 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4279 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4280 for (i=0; i<a->A->cmap->n; i++) { 4281 colors[i] = coloring->colors[larray[i]]; 4282 } 4283 ierr = PetscFree(larray);CHKERRQ(ierr); 4284 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4285 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4286 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4287 4288 /* set coloring for off-diagonal portion */ 4289 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4290 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4291 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4292 for (i=0; i<a->B->cmap->n; i++) { 4293 colors[i] = coloring->colors[larray[i]]; 4294 } 4295 ierr = PetscFree(larray);CHKERRQ(ierr); 4296 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4297 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4298 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4299 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4300 PetscFunctionReturn(0); 4301 } 4302 4303 #undef __FUNCT__ 4304 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4305 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4306 { 4307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4308 PetscErrorCode ierr; 4309 4310 PetscFunctionBegin; 4311 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4312 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4313 PetscFunctionReturn(0); 4314 } 4315 4316 #undef __FUNCT__ 4317 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4318 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4319 { 4320 PetscErrorCode ierr; 4321 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4322 PetscInt *indx; 4323 4324 PetscFunctionBegin; 4325 /* This routine will ONLY return MPIAIJ type matrix */ 4326 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4327 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4328 if (n == PETSC_DECIDE) { 4329 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4330 } 4331 /* Check sum(n) = N */ 4332 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4333 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4334 4335 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4336 rstart -= m; 4337 4338 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4339 for (i=0; i<m; i++) { 4340 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4341 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4342 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4343 } 4344 4345 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4346 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4347 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4348 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4349 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4350 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4351 PetscFunctionReturn(0); 4352 } 4353 4354 #undef __FUNCT__ 4355 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4356 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4357 { 4358 PetscErrorCode ierr; 4359 PetscInt m,N,i,rstart,nnz,Ii; 4360 PetscInt *indx; 4361 PetscScalar *values; 4362 4363 PetscFunctionBegin; 4364 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4365 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4366 for (i=0; i<m; i++) { 4367 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4368 Ii = i + rstart; 4369 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4370 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4371 } 4372 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 #undef __FUNCT__ 4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4379 /*@ 4380 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4381 matrices from each processor 4382 4383 Collective on MPI_Comm 4384 4385 Input Parameters: 4386 + comm - the communicators the parallel matrix will live on 4387 . inmat - the input sequential matrices 4388 . n - number of local columns (or PETSC_DECIDE) 4389 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4390 4391 Output Parameter: 4392 . outmat - the parallel matrix generated 4393 4394 Level: advanced 4395 4396 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4397 4398 @*/ 4399 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4400 { 4401 PetscErrorCode ierr; 4402 PetscMPIInt size; 4403 4404 PetscFunctionBegin; 4405 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4406 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4407 if (size == 1) { 4408 if (scall == MAT_INITIAL_MATRIX) { 4409 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4410 } else { 4411 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4412 } 4413 } else { 4414 if (scall == MAT_INITIAL_MATRIX) { 4415 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4416 } 4417 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4418 } 4419 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "MatFileSplit" 4425 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4426 { 4427 PetscErrorCode ierr; 4428 PetscMPIInt rank; 4429 PetscInt m,N,i,rstart,nnz; 4430 size_t len; 4431 const PetscInt *indx; 4432 PetscViewer out; 4433 char *name; 4434 Mat B; 4435 const PetscScalar *values; 4436 4437 PetscFunctionBegin; 4438 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4439 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4440 /* Should this be the type of the diagonal block of A? */ 4441 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4442 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4443 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4444 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4445 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4446 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4447 for (i=0; i<m; i++) { 4448 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4449 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4450 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4451 } 4452 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4454 4455 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4456 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4457 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4458 sprintf(name,"%s.%d",outfile,rank); 4459 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4460 ierr = PetscFree(name);CHKERRQ(ierr); 4461 ierr = MatView(B,out);CHKERRQ(ierr); 4462 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B);CHKERRQ(ierr); 4464 PetscFunctionReturn(0); 4465 } 4466 4467 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4468 #undef __FUNCT__ 4469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4470 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4471 { 4472 PetscErrorCode ierr; 4473 Mat_Merge_SeqsToMPI *merge; 4474 PetscContainer container; 4475 4476 PetscFunctionBegin; 4477 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4478 if (container) { 4479 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4492 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4493 ierr = PetscFree(merge);CHKERRQ(ierr); 4494 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4495 } 4496 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4497 PetscFunctionReturn(0); 4498 } 4499 4500 #include <../src/mat/utils/freespace.h> 4501 #include <petscbt.h> 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4505 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4506 { 4507 PetscErrorCode ierr; 4508 MPI_Comm comm; 4509 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4510 PetscMPIInt size,rank,taga,*len_s; 4511 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4512 PetscInt proc,m; 4513 PetscInt **buf_ri,**buf_rj; 4514 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4515 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4516 MPI_Request *s_waits,*r_waits; 4517 MPI_Status *status; 4518 MatScalar *aa=a->a; 4519 MatScalar **abuf_r,*ba_i; 4520 Mat_Merge_SeqsToMPI *merge; 4521 PetscContainer container; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4525 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4526 4527 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4529 4530 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4531 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4532 4533 bi = merge->bi; 4534 bj = merge->bj; 4535 buf_ri = merge->buf_ri; 4536 buf_rj = merge->buf_rj; 4537 4538 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4539 owners = merge->rowmap->range; 4540 len_s = merge->len_s; 4541 4542 /* send and recv matrix values */ 4543 /*-----------------------------*/ 4544 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4545 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4546 4547 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4548 for (proc=0,k=0; proc<size; proc++) { 4549 if (!len_s[proc]) continue; 4550 i = owners[proc]; 4551 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4552 k++; 4553 } 4554 4555 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4556 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4557 ierr = PetscFree(status);CHKERRQ(ierr); 4558 4559 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4560 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4561 4562 /* insert mat values of mpimat */ 4563 /*----------------------------*/ 4564 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4565 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4566 4567 for (k=0; k<merge->nrecv; k++) { 4568 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4569 nrows = *(buf_ri_k[k]); 4570 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4571 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4572 } 4573 4574 /* set values of ba */ 4575 m = merge->rowmap->n; 4576 for (i=0; i<m; i++) { 4577 arow = owners[rank] + i; 4578 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4579 bnzi = bi[i+1] - bi[i]; 4580 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4581 4582 /* add local non-zero vals of this proc's seqmat into ba */ 4583 anzi = ai[arow+1] - ai[arow]; 4584 aj = a->j + ai[arow]; 4585 aa = a->a + ai[arow]; 4586 nextaj = 0; 4587 for (j=0; nextaj<anzi; j++) { 4588 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4589 ba_i[j] += aa[nextaj++]; 4590 } 4591 } 4592 4593 /* add received vals into ba */ 4594 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4595 /* i-th row */ 4596 if (i == *nextrow[k]) { 4597 anzi = *(nextai[k]+1) - *nextai[k]; 4598 aj = buf_rj[k] + *(nextai[k]); 4599 aa = abuf_r[k] + *(nextai[k]); 4600 nextaj = 0; 4601 for (j=0; nextaj<anzi; j++) { 4602 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4603 ba_i[j] += aa[nextaj++]; 4604 } 4605 } 4606 nextrow[k]++; nextai[k]++; 4607 } 4608 } 4609 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4610 } 4611 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4613 4614 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4615 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4616 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4617 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4618 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4623 4624 #undef __FUNCT__ 4625 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4626 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4627 { 4628 PetscErrorCode ierr; 4629 Mat B_mpi; 4630 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4631 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4632 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4633 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4634 PetscInt len,proc,*dnz,*onz,bs,cbs; 4635 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4636 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4637 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4638 MPI_Status *status; 4639 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4640 PetscBT lnkbt; 4641 Mat_Merge_SeqsToMPI *merge; 4642 PetscContainer container; 4643 4644 PetscFunctionBegin; 4645 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4646 4647 /* make sure it is a PETSc comm */ 4648 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4651 4652 ierr = PetscNew(&merge);CHKERRQ(ierr); 4653 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4654 4655 /* determine row ownership */ 4656 /*---------------------------------------------------------*/ 4657 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4661 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4662 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4664 4665 m = merge->rowmap->n; 4666 owners = merge->rowmap->range; 4667 4668 /* determine the number of messages to send, their lengths */ 4669 /*---------------------------------------------------------*/ 4670 len_s = merge->len_s; 4671 4672 len = 0; /* length of buf_si[] */ 4673 merge->nsend = 0; 4674 for (proc=0; proc<size; proc++) { 4675 len_si[proc] = 0; 4676 if (proc == rank) { 4677 len_s[proc] = 0; 4678 } else { 4679 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4680 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4681 } 4682 if (len_s[proc]) { 4683 merge->nsend++; 4684 nrows = 0; 4685 for (i=owners[proc]; i<owners[proc+1]; i++) { 4686 if (ai[i+1] > ai[i]) nrows++; 4687 } 4688 len_si[proc] = 2*(nrows+1); 4689 len += len_si[proc]; 4690 } 4691 } 4692 4693 /* determine the number and length of messages to receive for ij-structure */ 4694 /*-------------------------------------------------------------------------*/ 4695 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4696 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4697 4698 /* post the Irecv of j-structure */ 4699 /*-------------------------------*/ 4700 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4701 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4702 4703 /* post the Isend of j-structure */ 4704 /*--------------------------------*/ 4705 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4706 4707 for (proc=0, k=0; proc<size; proc++) { 4708 if (!len_s[proc]) continue; 4709 i = owners[proc]; 4710 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4711 k++; 4712 } 4713 4714 /* receives and sends of j-structure are complete */ 4715 /*------------------------------------------------*/ 4716 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4717 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4718 4719 /* send and recv i-structure */ 4720 /*---------------------------*/ 4721 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4722 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4723 4724 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4725 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4726 for (proc=0,k=0; proc<size; proc++) { 4727 if (!len_s[proc]) continue; 4728 /* form outgoing message for i-structure: 4729 buf_si[0]: nrows to be sent 4730 [1:nrows]: row index (global) 4731 [nrows+1:2*nrows+1]: i-structure index 4732 */ 4733 /*-------------------------------------------*/ 4734 nrows = len_si[proc]/2 - 1; 4735 buf_si_i = buf_si + nrows+1; 4736 buf_si[0] = nrows; 4737 buf_si_i[0] = 0; 4738 nrows = 0; 4739 for (i=owners[proc]; i<owners[proc+1]; i++) { 4740 anzi = ai[i+1] - ai[i]; 4741 if (anzi) { 4742 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4743 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4744 nrows++; 4745 } 4746 } 4747 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4748 k++; 4749 buf_si += len_si[proc]; 4750 } 4751 4752 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4753 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4754 4755 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4756 for (i=0; i<merge->nrecv; i++) { 4757 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4758 } 4759 4760 ierr = PetscFree(len_si);CHKERRQ(ierr); 4761 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4762 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4763 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4764 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4765 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4766 ierr = PetscFree(status);CHKERRQ(ierr); 4767 4768 /* compute a local seq matrix in each processor */ 4769 /*----------------------------------------------*/ 4770 /* allocate bi array and free space for accumulating nonzero column info */ 4771 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4772 bi[0] = 0; 4773 4774 /* create and initialize a linked list */ 4775 nlnk = N+1; 4776 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4777 4778 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4779 len = ai[owners[rank+1]] - ai[owners[rank]]; 4780 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4781 4782 current_space = free_space; 4783 4784 /* determine symbolic info for each local row */ 4785 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4786 4787 for (k=0; k<merge->nrecv; k++) { 4788 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4789 nrows = *buf_ri_k[k]; 4790 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4791 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4792 } 4793 4794 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4795 len = 0; 4796 for (i=0; i<m; i++) { 4797 bnzi = 0; 4798 /* add local non-zero cols of this proc's seqmat into lnk */ 4799 arow = owners[rank] + i; 4800 anzi = ai[arow+1] - ai[arow]; 4801 aj = a->j + ai[arow]; 4802 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4803 bnzi += nlnk; 4804 /* add received col data into lnk */ 4805 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4806 if (i == *nextrow[k]) { /* i-th row */ 4807 anzi = *(nextai[k]+1) - *nextai[k]; 4808 aj = buf_rj[k] + *nextai[k]; 4809 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4810 bnzi += nlnk; 4811 nextrow[k]++; nextai[k]++; 4812 } 4813 } 4814 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4815 4816 /* if free space is not available, make more free space */ 4817 if (current_space->local_remaining<bnzi) { 4818 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4819 nspacedouble++; 4820 } 4821 /* copy data into free space, then initialize lnk */ 4822 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4823 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4824 4825 current_space->array += bnzi; 4826 current_space->local_used += bnzi; 4827 current_space->local_remaining -= bnzi; 4828 4829 bi[i+1] = bi[i] + bnzi; 4830 } 4831 4832 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4833 4834 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4835 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4836 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4837 4838 /* create symbolic parallel matrix B_mpi */ 4839 /*---------------------------------------*/ 4840 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4841 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4842 if (n==PETSC_DECIDE) { 4843 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4844 } else { 4845 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4846 } 4847 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4848 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4849 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4850 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4851 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4852 4853 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4854 B_mpi->assembled = PETSC_FALSE; 4855 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4856 merge->bi = bi; 4857 merge->bj = bj; 4858 merge->buf_ri = buf_ri; 4859 merge->buf_rj = buf_rj; 4860 merge->coi = NULL; 4861 merge->coj = NULL; 4862 merge->owners_co = NULL; 4863 4864 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4865 4866 /* attach the supporting struct to B_mpi for reuse */ 4867 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4868 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4869 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4870 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4871 *mpimat = B_mpi; 4872 4873 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4874 PetscFunctionReturn(0); 4875 } 4876 4877 #undef __FUNCT__ 4878 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4879 /*@C 4880 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4881 matrices from each processor 4882 4883 Collective on MPI_Comm 4884 4885 Input Parameters: 4886 + comm - the communicators the parallel matrix will live on 4887 . seqmat - the input sequential matrices 4888 . m - number of local rows (or PETSC_DECIDE) 4889 . n - number of local columns (or PETSC_DECIDE) 4890 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4891 4892 Output Parameter: 4893 . mpimat - the parallel matrix generated 4894 4895 Level: advanced 4896 4897 Notes: 4898 The dimensions of the sequential matrix in each processor MUST be the same. 4899 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4900 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4901 @*/ 4902 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4903 { 4904 PetscErrorCode ierr; 4905 PetscMPIInt size; 4906 4907 PetscFunctionBegin; 4908 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4909 if (size == 1) { 4910 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4911 if (scall == MAT_INITIAL_MATRIX) { 4912 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4913 } else { 4914 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4915 } 4916 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 PetscFunctionReturn(0); 4918 } 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4922 } 4923 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 4928 #undef __FUNCT__ 4929 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4930 /*@ 4931 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4932 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4933 with MatGetSize() 4934 4935 Not Collective 4936 4937 Input Parameters: 4938 + A - the matrix 4939 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4947 4948 @*/ 4949 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4950 { 4951 PetscErrorCode ierr; 4952 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4953 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4954 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4955 MatScalar *aa=a->a,*ba=b->a,*cam; 4956 PetscScalar *ca; 4957 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4958 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4959 PetscBool match; 4960 4961 PetscFunctionBegin; 4962 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4963 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4964 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4965 if (scall == MAT_INITIAL_MATRIX) { 4966 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4967 ci[0] = 0; 4968 for (i=0; i<am; i++) { 4969 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4970 } 4971 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4972 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4973 k = 0; 4974 for (i=0; i<am; i++) { 4975 ncols_o = bi[i+1] - bi[i]; 4976 ncols_d = ai[i+1] - ai[i]; 4977 /* off-diagonal portion of A */ 4978 for (jo=0; jo<ncols_o; jo++) { 4979 col = cmap[*bj]; 4980 if (col >= cstart) break; 4981 cj[k] = col; bj++; 4982 ca[k++] = *ba++; 4983 } 4984 /* diagonal portion of A */ 4985 for (j=0; j<ncols_d; j++) { 4986 cj[k] = cstart + *aj++; 4987 ca[k++] = *aa++; 4988 } 4989 /* off-diagonal portion of A */ 4990 for (j=jo; j<ncols_o; j++) { 4991 cj[k] = cmap[*bj++]; 4992 ca[k++] = *ba++; 4993 } 4994 } 4995 /* put together the new matrix */ 4996 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4997 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4998 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4999 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5000 mat->free_a = PETSC_TRUE; 5001 mat->free_ij = PETSC_TRUE; 5002 mat->nonew = 0; 5003 } else if (scall == MAT_REUSE_MATRIX) { 5004 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5005 ci = mat->i; cj = mat->j; cam = mat->a; 5006 for (i=0; i<am; i++) { 5007 /* off-diagonal portion of A */ 5008 ncols_o = bi[i+1] - bi[i]; 5009 for (jo=0; jo<ncols_o; jo++) { 5010 col = cmap[*bj]; 5011 if (col >= cstart) break; 5012 *cam++ = *ba++; bj++; 5013 } 5014 /* diagonal portion of A */ 5015 ncols_d = ai[i+1] - ai[i]; 5016 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5017 /* off-diagonal portion of A */ 5018 for (j=jo; j<ncols_o; j++) { 5019 *cam++ = *ba++; bj++; 5020 } 5021 } 5022 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5023 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 #undef __FUNCT__ 5028 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5029 /*@C 5030 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5031 5032 Not Collective 5033 5034 Input Parameters: 5035 + A - the matrix 5036 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5037 - row, col - index sets of rows and columns to extract (or NULL) 5038 5039 Output Parameter: 5040 . A_loc - the local sequential matrix generated 5041 5042 Level: developer 5043 5044 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5045 5046 @*/ 5047 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5048 { 5049 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5050 PetscErrorCode ierr; 5051 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5052 IS isrowa,iscola; 5053 Mat *aloc; 5054 PetscBool match; 5055 5056 PetscFunctionBegin; 5057 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5058 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5059 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5060 if (!row) { 5061 start = A->rmap->rstart; end = A->rmap->rend; 5062 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5063 } else { 5064 isrowa = *row; 5065 } 5066 if (!col) { 5067 start = A->cmap->rstart; 5068 cmap = a->garray; 5069 nzA = a->A->cmap->n; 5070 nzB = a->B->cmap->n; 5071 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5072 ncols = 0; 5073 for (i=0; i<nzB; i++) { 5074 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5075 else break; 5076 } 5077 imark = i; 5078 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5079 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5080 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5081 } else { 5082 iscola = *col; 5083 } 5084 if (scall != MAT_INITIAL_MATRIX) { 5085 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5086 aloc[0] = *A_loc; 5087 } 5088 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5089 *A_loc = aloc[0]; 5090 ierr = PetscFree(aloc);CHKERRQ(ierr); 5091 if (!row) { 5092 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5093 } 5094 if (!col) { 5095 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5096 } 5097 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5098 PetscFunctionReturn(0); 5099 } 5100 5101 #undef __FUNCT__ 5102 #define __FUNCT__ "MatGetBrowsOfAcols" 5103 /*@C 5104 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5105 5106 Collective on Mat 5107 5108 Input Parameters: 5109 + A,B - the matrices in mpiaij format 5110 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5111 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5112 5113 Output Parameter: 5114 + rowb, colb - index sets of rows and columns of B to extract 5115 - B_seq - the sequential matrix generated 5116 5117 Level: developer 5118 5119 @*/ 5120 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5121 { 5122 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5123 PetscErrorCode ierr; 5124 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5125 IS isrowb,iscolb; 5126 Mat *bseq=NULL; 5127 5128 PetscFunctionBegin; 5129 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5130 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5131 } 5132 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5133 5134 if (scall == MAT_INITIAL_MATRIX) { 5135 start = A->cmap->rstart; 5136 cmap = a->garray; 5137 nzA = a->A->cmap->n; 5138 nzB = a->B->cmap->n; 5139 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5140 ncols = 0; 5141 for (i=0; i<nzB; i++) { /* row < local row index */ 5142 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5143 else break; 5144 } 5145 imark = i; 5146 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5147 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5148 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5149 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5150 } else { 5151 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5152 isrowb = *rowb; iscolb = *colb; 5153 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5154 bseq[0] = *B_seq; 5155 } 5156 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5157 *B_seq = bseq[0]; 5158 ierr = PetscFree(bseq);CHKERRQ(ierr); 5159 if (!rowb) { 5160 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5161 } else { 5162 *rowb = isrowb; 5163 } 5164 if (!colb) { 5165 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5166 } else { 5167 *colb = iscolb; 5168 } 5169 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5170 PetscFunctionReturn(0); 5171 } 5172 5173 #undef __FUNCT__ 5174 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5175 /* 5176 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5177 of the OFF-DIAGONAL portion of local A 5178 5179 Collective on Mat 5180 5181 Input Parameters: 5182 + A,B - the matrices in mpiaij format 5183 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5184 5185 Output Parameter: 5186 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5187 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5188 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5189 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5190 5191 Level: developer 5192 5193 */ 5194 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5195 { 5196 VecScatter_MPI_General *gen_to,*gen_from; 5197 PetscErrorCode ierr; 5198 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5199 Mat_SeqAIJ *b_oth; 5200 VecScatter ctx =a->Mvctx; 5201 MPI_Comm comm; 5202 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5203 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5204 PetscScalar *rvalues,*svalues; 5205 MatScalar *b_otha,*bufa,*bufA; 5206 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5207 MPI_Request *rwaits = NULL,*swaits = NULL; 5208 MPI_Status *sstatus,rstatus; 5209 PetscMPIInt jj; 5210 PetscInt *cols,sbs,rbs; 5211 PetscScalar *vals; 5212 5213 PetscFunctionBegin; 5214 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5215 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5216 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5217 } 5218 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5219 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5220 5221 gen_to = (VecScatter_MPI_General*)ctx->todata; 5222 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5223 rvalues = gen_from->values; /* holds the length of receiving row */ 5224 svalues = gen_to->values; /* holds the length of sending row */ 5225 nrecvs = gen_from->n; 5226 nsends = gen_to->n; 5227 5228 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5229 srow = gen_to->indices; /* local row index to be sent */ 5230 sstarts = gen_to->starts; 5231 sprocs = gen_to->procs; 5232 sstatus = gen_to->sstatus; 5233 sbs = gen_to->bs; 5234 rstarts = gen_from->starts; 5235 rprocs = gen_from->procs; 5236 rbs = gen_from->bs; 5237 5238 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5239 if (scall == MAT_INITIAL_MATRIX) { 5240 /* i-array */ 5241 /*---------*/ 5242 /* post receives */ 5243 for (i=0; i<nrecvs; i++) { 5244 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5245 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5246 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5247 } 5248 5249 /* pack the outgoing message */ 5250 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5251 5252 sstartsj[0] = 0; 5253 rstartsj[0] = 0; 5254 len = 0; /* total length of j or a array to be sent */ 5255 k = 0; 5256 for (i=0; i<nsends; i++) { 5257 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5258 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5259 for (j=0; j<nrows; j++) { 5260 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5261 for (l=0; l<sbs; l++) { 5262 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5263 5264 rowlen[j*sbs+l] = ncols; 5265 5266 len += ncols; 5267 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5268 } 5269 k++; 5270 } 5271 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5272 5273 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5274 } 5275 /* recvs and sends of i-array are completed */ 5276 i = nrecvs; 5277 while (i--) { 5278 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5279 } 5280 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5281 5282 /* allocate buffers for sending j and a arrays */ 5283 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5284 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5285 5286 /* create i-array of B_oth */ 5287 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5288 5289 b_othi[0] = 0; 5290 len = 0; /* total length of j or a array to be received */ 5291 k = 0; 5292 for (i=0; i<nrecvs; i++) { 5293 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5294 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5295 for (j=0; j<nrows; j++) { 5296 b_othi[k+1] = b_othi[k] + rowlen[j]; 5297 len += rowlen[j]; k++; 5298 } 5299 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5300 } 5301 5302 /* allocate space for j and a arrrays of B_oth */ 5303 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5304 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5305 5306 /* j-array */ 5307 /*---------*/ 5308 /* post receives of j-array */ 5309 for (i=0; i<nrecvs; i++) { 5310 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5311 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5312 } 5313 5314 /* pack the outgoing message j-array */ 5315 k = 0; 5316 for (i=0; i<nsends; i++) { 5317 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5318 bufJ = bufj+sstartsj[i]; 5319 for (j=0; j<nrows; j++) { 5320 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5321 for (ll=0; ll<sbs; ll++) { 5322 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5323 for (l=0; l<ncols; l++) { 5324 *bufJ++ = cols[l]; 5325 } 5326 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5327 } 5328 } 5329 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5330 } 5331 5332 /* recvs and sends of j-array are completed */ 5333 i = nrecvs; 5334 while (i--) { 5335 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5336 } 5337 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5338 } else if (scall == MAT_REUSE_MATRIX) { 5339 sstartsj = *startsj_s; 5340 rstartsj = *startsj_r; 5341 bufa = *bufa_ptr; 5342 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5343 b_otha = b_oth->a; 5344 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5345 5346 /* a-array */ 5347 /*---------*/ 5348 /* post receives of a-array */ 5349 for (i=0; i<nrecvs; i++) { 5350 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5351 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5352 } 5353 5354 /* pack the outgoing message a-array */ 5355 k = 0; 5356 for (i=0; i<nsends; i++) { 5357 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5358 bufA = bufa+sstartsj[i]; 5359 for (j=0; j<nrows; j++) { 5360 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5361 for (ll=0; ll<sbs; ll++) { 5362 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5363 for (l=0; l<ncols; l++) { 5364 *bufA++ = vals[l]; 5365 } 5366 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5367 } 5368 } 5369 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5370 } 5371 /* recvs and sends of a-array are completed */ 5372 i = nrecvs; 5373 while (i--) { 5374 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5375 } 5376 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5377 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5378 5379 if (scall == MAT_INITIAL_MATRIX) { 5380 /* put together the new matrix */ 5381 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5382 5383 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5384 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5385 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5386 b_oth->free_a = PETSC_TRUE; 5387 b_oth->free_ij = PETSC_TRUE; 5388 b_oth->nonew = 0; 5389 5390 ierr = PetscFree(bufj);CHKERRQ(ierr); 5391 if (!startsj_s || !bufa_ptr) { 5392 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5393 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5394 } else { 5395 *startsj_s = sstartsj; 5396 *startsj_r = rstartsj; 5397 *bufa_ptr = bufa; 5398 } 5399 } 5400 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5401 PetscFunctionReturn(0); 5402 } 5403 5404 #undef __FUNCT__ 5405 #define __FUNCT__ "MatGetCommunicationStructs" 5406 /*@C 5407 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5408 5409 Not Collective 5410 5411 Input Parameters: 5412 . A - The matrix in mpiaij format 5413 5414 Output Parameter: 5415 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5416 . colmap - A map from global column index to local index into lvec 5417 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5418 5419 Level: developer 5420 5421 @*/ 5422 #if defined(PETSC_USE_CTABLE) 5423 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5424 #else 5425 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5426 #endif 5427 { 5428 Mat_MPIAIJ *a; 5429 5430 PetscFunctionBegin; 5431 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5432 PetscValidPointer(lvec, 2); 5433 PetscValidPointer(colmap, 3); 5434 PetscValidPointer(multScatter, 4); 5435 a = (Mat_MPIAIJ*) A->data; 5436 if (lvec) *lvec = a->lvec; 5437 if (colmap) *colmap = a->colmap; 5438 if (multScatter) *multScatter = a->Mvctx; 5439 PetscFunctionReturn(0); 5440 } 5441 5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5444 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5445 5446 #undef __FUNCT__ 5447 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5448 /* 5449 Computes (B'*A')' since computing B*A directly is untenable 5450 5451 n p p 5452 ( ) ( ) ( ) 5453 m ( A ) * n ( B ) = m ( C ) 5454 ( ) ( ) ( ) 5455 5456 */ 5457 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5458 { 5459 PetscErrorCode ierr; 5460 Mat At,Bt,Ct; 5461 5462 PetscFunctionBegin; 5463 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5464 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5465 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5466 ierr = MatDestroy(&At);CHKERRQ(ierr); 5467 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5468 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5469 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5470 PetscFunctionReturn(0); 5471 } 5472 5473 #undef __FUNCT__ 5474 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5475 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5476 { 5477 PetscErrorCode ierr; 5478 PetscInt m=A->rmap->n,n=B->cmap->n; 5479 Mat Cmat; 5480 5481 PetscFunctionBegin; 5482 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5483 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5484 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5485 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5486 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5487 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5488 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5489 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5490 5491 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5492 5493 *C = Cmat; 5494 PetscFunctionReturn(0); 5495 } 5496 5497 /* ----------------------------------------------------------------*/ 5498 #undef __FUNCT__ 5499 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5500 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5501 { 5502 PetscErrorCode ierr; 5503 5504 PetscFunctionBegin; 5505 if (scall == MAT_INITIAL_MATRIX) { 5506 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5507 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5508 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5509 } 5510 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5511 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5512 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5513 PetscFunctionReturn(0); 5514 } 5515 5516 #if defined(PETSC_HAVE_MUMPS) 5517 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5518 #endif 5519 #if defined(PETSC_HAVE_PASTIX) 5520 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5521 #endif 5522 #if defined(PETSC_HAVE_SUPERLU_DIST) 5523 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5524 #endif 5525 #if defined(PETSC_HAVE_CLIQUE) 5526 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5527 #endif 5528 5529 /*MC 5530 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5531 5532 Options Database Keys: 5533 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5534 5535 Level: beginner 5536 5537 .seealso: MatCreateAIJ() 5538 M*/ 5539 5540 #undef __FUNCT__ 5541 #define __FUNCT__ "MatCreate_MPIAIJ" 5542 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5543 { 5544 Mat_MPIAIJ *b; 5545 PetscErrorCode ierr; 5546 PetscMPIInt size; 5547 5548 PetscFunctionBegin; 5549 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5550 5551 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5552 B->data = (void*)b; 5553 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5554 B->assembled = PETSC_FALSE; 5555 B->insertmode = NOT_SET_VALUES; 5556 b->size = size; 5557 5558 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5559 5560 /* build cache for off array entries formed */ 5561 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5562 5563 b->donotstash = PETSC_FALSE; 5564 b->colmap = 0; 5565 b->garray = 0; 5566 b->roworiented = PETSC_TRUE; 5567 5568 /* stuff used for matrix vector multiply */ 5569 b->lvec = NULL; 5570 b->Mvctx = NULL; 5571 5572 /* stuff for MatGetRow() */ 5573 b->rowindices = 0; 5574 b->rowvalues = 0; 5575 b->getrowactive = PETSC_FALSE; 5576 5577 /* flexible pointer used in CUSP/CUSPARSE classes */ 5578 b->spptr = NULL; 5579 5580 #if defined(PETSC_HAVE_MUMPS) 5581 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5582 #endif 5583 #if defined(PETSC_HAVE_PASTIX) 5584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5585 #endif 5586 #if defined(PETSC_HAVE_SUPERLU_DIST) 5587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5588 #endif 5589 #if defined(PETSC_HAVE_CLIQUE) 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5591 #endif 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5606 PetscFunctionReturn(0); 5607 } 5608 5609 #undef __FUNCT__ 5610 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5611 /*@ 5612 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5613 and "off-diagonal" part of the matrix in CSR format. 5614 5615 Collective on MPI_Comm 5616 5617 Input Parameters: 5618 + comm - MPI communicator 5619 . m - number of local rows (Cannot be PETSC_DECIDE) 5620 . n - This value should be the same as the local size used in creating the 5621 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5622 calculated if N is given) For square matrices n is almost always m. 5623 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5624 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5625 . i - row indices for "diagonal" portion of matrix 5626 . j - column indices 5627 . a - matrix values 5628 . oi - row indices for "off-diagonal" portion of matrix 5629 . oj - column indices 5630 - oa - matrix values 5631 5632 Output Parameter: 5633 . mat - the matrix 5634 5635 Level: advanced 5636 5637 Notes: 5638 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5639 must free the arrays once the matrix has been destroyed and not before. 5640 5641 The i and j indices are 0 based 5642 5643 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5644 5645 This sets local rows and cannot be used to set off-processor values. 5646 5647 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5648 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5649 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5650 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5651 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5652 communication if it is known that only local entries will be set. 5653 5654 .keywords: matrix, aij, compressed row, sparse, parallel 5655 5656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5657 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5658 @*/ 5659 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5660 { 5661 PetscErrorCode ierr; 5662 Mat_MPIAIJ *maij; 5663 5664 PetscFunctionBegin; 5665 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5666 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5667 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5668 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5669 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5670 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5671 maij = (Mat_MPIAIJ*) (*mat)->data; 5672 5673 (*mat)->preallocated = PETSC_TRUE; 5674 5675 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5676 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5677 5678 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5679 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5680 5681 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5682 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 5686 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5689 PetscFunctionReturn(0); 5690 } 5691 5692 /* 5693 Special version for direct calls from Fortran 5694 */ 5695 #include <petsc-private/fortranimpl.h> 5696 5697 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5698 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5699 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5700 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5701 #endif 5702 5703 /* Change these macros so can be used in void function */ 5704 #undef CHKERRQ 5705 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5706 #undef SETERRQ2 5707 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5708 #undef SETERRQ3 5709 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5710 #undef SETERRQ 5711 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5712 5713 #undef __FUNCT__ 5714 #define __FUNCT__ "matsetvaluesmpiaij_" 5715 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5716 { 5717 Mat mat = *mmat; 5718 PetscInt m = *mm, n = *mn; 5719 InsertMode addv = *maddv; 5720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5721 PetscScalar value; 5722 PetscErrorCode ierr; 5723 5724 MatCheckPreallocated(mat,1); 5725 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5726 5727 #if defined(PETSC_USE_DEBUG) 5728 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5729 #endif 5730 { 5731 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5732 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5733 PetscBool roworiented = aij->roworiented; 5734 5735 /* Some Variables required in the macro */ 5736 Mat A = aij->A; 5737 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5738 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5739 MatScalar *aa = a->a; 5740 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5741 Mat B = aij->B; 5742 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5743 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5744 MatScalar *ba = b->a; 5745 5746 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5747 PetscInt nonew = a->nonew; 5748 MatScalar *ap1,*ap2; 5749 5750 PetscFunctionBegin; 5751 for (i=0; i<m; i++) { 5752 if (im[i] < 0) continue; 5753 #if defined(PETSC_USE_DEBUG) 5754 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5755 #endif 5756 if (im[i] >= rstart && im[i] < rend) { 5757 row = im[i] - rstart; 5758 lastcol1 = -1; 5759 rp1 = aj + ai[row]; 5760 ap1 = aa + ai[row]; 5761 rmax1 = aimax[row]; 5762 nrow1 = ailen[row]; 5763 low1 = 0; 5764 high1 = nrow1; 5765 lastcol2 = -1; 5766 rp2 = bj + bi[row]; 5767 ap2 = ba + bi[row]; 5768 rmax2 = bimax[row]; 5769 nrow2 = bilen[row]; 5770 low2 = 0; 5771 high2 = nrow2; 5772 5773 for (j=0; j<n; j++) { 5774 if (roworiented) value = v[i*n+j]; 5775 else value = v[i+j*m]; 5776 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5777 if (in[j] >= cstart && in[j] < cend) { 5778 col = in[j] - cstart; 5779 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5780 } else if (in[j] < 0) continue; 5781 #if defined(PETSC_USE_DEBUG) 5782 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5783 #endif 5784 else { 5785 if (mat->was_assembled) { 5786 if (!aij->colmap) { 5787 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5788 } 5789 #if defined(PETSC_USE_CTABLE) 5790 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5791 col--; 5792 #else 5793 col = aij->colmap[in[j]] - 1; 5794 #endif 5795 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5796 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5797 col = in[j]; 5798 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5799 B = aij->B; 5800 b = (Mat_SeqAIJ*)B->data; 5801 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5802 rp2 = bj + bi[row]; 5803 ap2 = ba + bi[row]; 5804 rmax2 = bimax[row]; 5805 nrow2 = bilen[row]; 5806 low2 = 0; 5807 high2 = nrow2; 5808 bm = aij->B->rmap->n; 5809 ba = b->a; 5810 } 5811 } else col = in[j]; 5812 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5813 } 5814 } 5815 } else if (!aij->donotstash) { 5816 if (roworiented) { 5817 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5818 } else { 5819 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5820 } 5821 } 5822 } 5823 } 5824 PetscFunctionReturnVoid(); 5825 } 5826 5827