1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 a_noinsert: ; \ 406 ailen[row] = nrow1; \ 407 } 408 409 410 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 411 { \ 412 if (col <= lastcol2) low2 = 0; \ 413 else high2 = nrow2; \ 414 lastcol2 = col; \ 415 while (high2-low2 > 5) { \ 416 t = (low2+high2)/2; \ 417 if (rp2[t] > col) high2 = t; \ 418 else low2 = t; \ 419 } \ 420 for (_i=low2; _i<high2; _i++) { \ 421 if (rp2[_i] > col) break; \ 422 if (rp2[_i] == col) { \ 423 if (addv == ADD_VALUES) ap2[_i] += value; \ 424 else ap2[_i] = value; \ 425 goto b_noinsert; \ 426 } \ 427 } \ 428 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 429 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 431 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 432 N = nrow2++ - 1; b->nz++; high2++; \ 433 /* shift up all the later entries in this row */ \ 434 for (ii=N; ii>=_i; ii--) { \ 435 rp2[ii+1] = rp2[ii]; \ 436 ap2[ii+1] = ap2[ii]; \ 437 } \ 438 rp2[_i] = col; \ 439 ap2[_i] = value; \ 440 b_noinsert: ; \ 441 bilen[row] = nrow2; \ 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 446 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 447 { 448 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 450 PetscErrorCode ierr; 451 PetscInt l,*garray = mat->garray,diag; 452 453 PetscFunctionBegin; 454 /* code only works for square matrices A */ 455 456 /* find size of row to the left of the diagonal part */ 457 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 458 row = row - diag; 459 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 460 if (garray[b->j[b->i[row]+l]] > diag) break; 461 } 462 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 463 464 /* diagonal part */ 465 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 466 467 /* right of diagonal part */ 468 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "MatSetValues_MPIAIJ" 474 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 475 { 476 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 477 PetscScalar value; 478 PetscErrorCode ierr; 479 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 480 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 481 PetscBool roworiented = aij->roworiented; 482 483 /* Some Variables required in the macro */ 484 Mat A = aij->A; 485 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 486 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 487 MatScalar *aa = a->a; 488 PetscBool ignorezeroentries = a->ignorezeroentries; 489 Mat B = aij->B; 490 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 491 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 492 MatScalar *ba = b->a; 493 494 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 495 PetscInt nonew; 496 MatScalar *ap1,*ap2; 497 498 PetscFunctionBegin; 499 if (v) PetscValidScalarPointer(v,6); 500 for (i=0; i<m; i++) { 501 if (im[i] < 0) continue; 502 #if defined(PETSC_USE_DEBUG) 503 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 504 #endif 505 if (im[i] >= rstart && im[i] < rend) { 506 row = im[i] - rstart; 507 lastcol1 = -1; 508 rp1 = aj + ai[row]; 509 ap1 = aa + ai[row]; 510 rmax1 = aimax[row]; 511 nrow1 = ailen[row]; 512 low1 = 0; 513 high1 = nrow1; 514 lastcol2 = -1; 515 rp2 = bj + bi[row]; 516 ap2 = ba + bi[row]; 517 rmax2 = bimax[row]; 518 nrow2 = bilen[row]; 519 low2 = 0; 520 high2 = nrow2; 521 522 for (j=0; j<n; j++) { 523 if (v) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 } else value = 0.0; 527 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 528 if (in[j] >= cstart && in[j] < cend) { 529 col = in[j] - cstart; 530 nonew = a->nonew; 531 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 532 } else if (in[j] < 0) continue; 533 #if defined(PETSC_USE_DEBUG) 534 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 535 #endif 536 else { 537 if (mat->was_assembled) { 538 if (!aij->colmap) { 539 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 540 } 541 #if defined(PETSC_USE_CTABLE) 542 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 543 col--; 544 #else 545 col = aij->colmap[in[j]] - 1; 546 #endif 547 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 548 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 549 col = in[j]; 550 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 551 B = aij->B; 552 b = (Mat_SeqAIJ*)B->data; 553 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 554 rp2 = bj + bi[row]; 555 ap2 = ba + bi[row]; 556 rmax2 = bimax[row]; 557 nrow2 = bilen[row]; 558 low2 = 0; 559 high2 = nrow2; 560 bm = aij->B->rmap->n; 561 ba = b->a; 562 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 563 } else col = in[j]; 564 nonew = b->nonew; 565 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 566 } 567 } 568 } else { 569 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 570 if (!aij->donotstash) { 571 mat->assembled = PETSC_FALSE; 572 if (roworiented) { 573 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 574 } else { 575 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } 577 } 578 } 579 } 580 PetscFunctionReturn(0); 581 } 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "MatGetValues_MPIAIJ" 585 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 586 { 587 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 588 PetscErrorCode ierr; 589 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 590 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 591 592 PetscFunctionBegin; 593 for (i=0; i<m; i++) { 594 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 595 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 596 if (idxm[i] >= rstart && idxm[i] < rend) { 597 row = idxm[i] - rstart; 598 for (j=0; j<n; j++) { 599 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 600 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 601 if (idxn[j] >= cstart && idxn[j] < cend) { 602 col = idxn[j] - cstart; 603 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 604 } else { 605 if (!aij->colmap) { 606 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 607 } 608 #if defined(PETSC_USE_CTABLE) 609 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 610 col--; 611 #else 612 col = aij->colmap[idxn[j]] - 1; 613 #endif 614 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 615 else { 616 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 617 } 618 } 619 } 620 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 621 } 622 PetscFunctionReturn(0); 623 } 624 625 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 629 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscErrorCode ierr; 633 PetscInt nstash,reallocs; 634 InsertMode addv; 635 636 PetscFunctionBegin; 637 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 638 639 /* make sure all processors are either in INSERTMODE or ADDMODE */ 640 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 641 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 642 mat->insertmode = addv; /* in case this processor had no cache */ 643 644 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 645 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 646 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNCT__ 651 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 652 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 653 { 654 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 656 PetscErrorCode ierr; 657 PetscMPIInt n; 658 PetscInt i,j,rstart,ncols,flg; 659 PetscInt *row,*col; 660 PetscBool other_disassembled; 661 PetscScalar *val; 662 InsertMode addv = mat->insertmode; 663 664 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 665 666 PetscFunctionBegin; 667 if (!aij->donotstash && !mat->nooffprocentries) { 668 while (1) { 669 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 670 if (!flg) break; 671 672 for (i=0; i<n; ) { 673 /* Now identify the consecutive vals belonging to the same row */ 674 for (j=i,rstart=row[j]; j<n; j++) { 675 if (row[j] != rstart) break; 676 } 677 if (j < n) ncols = j-i; 678 else ncols = n-i; 679 /* Now assemble all these values with a single function call */ 680 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 681 682 i = j; 683 } 684 } 685 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 686 } 687 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 688 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 689 690 /* determine if any processor has disassembled, if so we must 691 also disassemble ourselfs, in order that we may reassemble. */ 692 /* 693 if nonzero structure of submatrix B cannot change then we know that 694 no processor disassembled thus we can skip this stuff 695 */ 696 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 697 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 698 if (mat->was_assembled && !other_disassembled) { 699 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 700 } 701 } 702 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 703 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 704 } 705 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 706 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 707 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 708 709 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 710 711 aij->rowvalues = 0; 712 713 /* used by MatAXPY() */ 714 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 715 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 716 717 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 718 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 719 PetscFunctionReturn(0); 720 } 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 724 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 725 { 726 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 727 PetscErrorCode ierr; 728 729 PetscFunctionBegin; 730 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 731 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatZeroRows_MPIAIJ" 737 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 738 { 739 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 740 PetscInt *owners = A->rmap->range; 741 PetscInt n = A->rmap->n; 742 PetscMPIInt size = mat->size; 743 PetscSF sf; 744 PetscInt *lrows; 745 PetscSFNode *rrows; 746 PetscInt lastidx = -1, r, p = 0, len = 0; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 /* Create SF where leaves are input rows and roots are owned rows */ 751 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 752 for (r = 0; r < n; ++r) lrows[r] = -1; 753 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 754 for (r = 0; r < N; ++r) { 755 const PetscInt idx = rows[r]; 756 PetscBool found = PETSC_FALSE; 757 /* Trick for efficient searching for sorted rows */ 758 if (lastidx > idx) p = 0; 759 lastidx = idx; 760 for (; p < size; ++p) { 761 if (idx >= owners[p] && idx < owners[p+1]) { 762 rrows[r].rank = p; 763 rrows[r].index = rows[r] - owners[p]; 764 found = PETSC_TRUE; 765 break; 766 } 767 } 768 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 769 } 770 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 771 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 772 /* Collect flags for rows to be zeroed */ 773 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 774 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 775 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 776 /* Compress and put in row numbers */ 777 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 778 /* fix right hand side if needed */ 779 if (x && b) { 780 const PetscScalar *xx; 781 PetscScalar *bb; 782 783 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 784 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 786 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 787 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 788 } 789 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 790 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 791 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 792 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 793 } else if (diag != 0.0) { 794 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 796 for (r = 0; r < len; ++r) { 797 const PetscInt row = lrows[r] + A->rmap->rstart; 798 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 799 } 800 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 801 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 802 } else { 803 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(lrows);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 811 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 812 { 813 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 814 PetscErrorCode ierr; 815 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 816 PetscInt i,j,r,m,p = 0,len = 0; 817 PetscInt *lrows,*owners = A->rmap->range; 818 PetscSFNode *rrows; 819 PetscSF sf; 820 const PetscScalar *xx; 821 PetscScalar *bb,*mask; 822 Vec xmask,lmask; 823 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 824 const PetscInt *aj, *ii,*ridx; 825 PetscScalar *aa; 826 #if defined(PETSC_DEBUG) 827 PetscBool found = PETSC_FALSE; 828 #endif 829 830 PetscFunctionBegin; 831 /* Create SF where leaves are input rows and roots are owned rows */ 832 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 833 for (r = 0; r < n; ++r) lrows[r] = -1; 834 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 835 for (r = 0; r < N; ++r) { 836 const PetscInt idx = rows[r]; 837 PetscBool found = PETSC_FALSE; 838 /* Trick for efficient searching for sorted rows */ 839 if (lastidx > idx) p = 0; 840 lastidx = idx; 841 for (; p < size; ++p) { 842 if (idx >= owners[p] && idx < owners[p+1]) { 843 rrows[r].rank = p; 844 rrows[r].index = rows[r] - owners[p]; 845 found = PETSC_TRUE; 846 break; 847 } 848 } 849 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatMult_MPIAIJ" 930 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 931 { 932 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 933 PetscErrorCode ierr; 934 PetscInt nt; 935 936 PetscFunctionBegin; 937 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 938 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 939 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 941 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 948 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 949 { 950 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNCT__ 959 #define __FUNCT__ "MatMultAdd_MPIAIJ" 960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 961 { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 963 PetscErrorCode ierr; 964 965 PetscFunctionBegin; 966 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 968 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 969 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 #undef __FUNCT__ 974 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 975 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 976 { 977 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 978 PetscErrorCode ierr; 979 PetscBool merged; 980 981 PetscFunctionBegin; 982 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 983 /* do nondiagonal part */ 984 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 985 if (!merged) { 986 /* send it on its way */ 987 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 988 /* do local part */ 989 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 990 /* receive remote parts: note this assumes the values are not actually */ 991 /* added in yy until the next line, */ 992 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 993 } else { 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* send it on its way */ 997 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 998 /* values actually were received in the Begin() but we need to call this nop */ 999 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1000 } 1001 PetscFunctionReturn(0); 1002 } 1003 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1007 { 1008 MPI_Comm comm; 1009 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1010 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1011 IS Me,Notme; 1012 PetscErrorCode ierr; 1013 PetscInt M,N,first,last,*notme,i; 1014 PetscMPIInt size; 1015 1016 PetscFunctionBegin; 1017 /* Easy test: symmetric diagonal block */ 1018 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1019 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1020 if (!*f) PetscFunctionReturn(0); 1021 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1023 if (size == 1) PetscFunctionReturn(0); 1024 1025 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1026 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1027 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1028 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1029 for (i=0; i<first; i++) notme[i] = i; 1030 for (i=last; i<M; i++) notme[i-last+first] = i; 1031 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1032 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1033 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1034 Aoff = Aoffs[0]; 1035 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1036 Boff = Boffs[0]; 1037 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1038 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1039 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1040 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1041 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1042 ierr = PetscFree(notme);CHKERRQ(ierr); 1043 PetscFunctionReturn(0); 1044 } 1045 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1048 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBegin; 1054 /* do nondiagonal part */ 1055 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1056 /* send it on its way */ 1057 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1058 /* do local part */ 1059 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1060 /* receive remote parts */ 1061 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* 1066 This only works correctly for square matrices where the subblock A->A is the 1067 diagonal block 1068 */ 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 PetscErrorCode ierr; 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1075 1076 PetscFunctionBegin; 1077 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1078 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1079 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatScale_MPIAIJ" 1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1086 { 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 1090 PetscFunctionBegin; 1091 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1092 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatDestroy_MPIAIJ" 1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1099 { 1100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 #if defined(PETSC_USE_LOG) 1105 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1106 #endif 1107 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1108 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1109 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1110 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1111 #if defined(PETSC_USE_CTABLE) 1112 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1113 #else 1114 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1115 #endif 1116 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1117 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1119 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1120 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1121 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1122 1123 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1130 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1141 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1142 PetscErrorCode ierr; 1143 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1144 int fd; 1145 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1146 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1147 PetscScalar *column_values; 1148 PetscInt message_count,flowcontrolcount; 1149 FILE *file; 1150 1151 PetscFunctionBegin; 1152 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1153 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1154 nz = A->nz + B->nz; 1155 if (!rank) { 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = mat->rmap->N; 1158 header[2] = mat->cmap->N; 1159 1160 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1161 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1162 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1163 /* get largest number of rows any processor has */ 1164 rlen = mat->rmap->n; 1165 range = mat->rmap->range; 1166 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1167 } else { 1168 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1169 rlen = mat->rmap->n; 1170 } 1171 1172 /* load up the local row counts */ 1173 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1174 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 for (i=1; i<size; i++) { 1181 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1182 rlen = range[i+1] - range[i]; 1183 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1189 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1193 1194 /* load up the local column indices */ 1195 nzmax = nz; /* th processor needs space a largest processor needs */ 1196 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<mat->rmap->n; i++) { 1200 for (j=B->i[i]; j<B->i[i+1]; j++) { 1201 if ((col = garray[B->j[j]]) > cstart) break; 1202 column_indices[cnt++] = col; 1203 } 1204 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1205 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1206 } 1207 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1208 1209 /* store the column indices to the file */ 1210 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1211 if (!rank) { 1212 MPI_Status status; 1213 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 for (i=1; i<size; i++) { 1215 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1216 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1217 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1218 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227 } 1228 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1229 1230 /* load up the local column values */ 1231 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1232 cnt = 0; 1233 for (i=0; i<mat->rmap->n; i++) { 1234 for (j=B->i[i]; j<B->i[i+1]; j++) { 1235 if (garray[B->j[j]] > cstart) break; 1236 column_values[cnt++] = B->a[j]; 1237 } 1238 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1239 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1240 } 1241 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1242 1243 /* store the column values to the file */ 1244 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1245 if (!rank) { 1246 MPI_Status status; 1247 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1248 for (i=1; i<size; i++) { 1249 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1250 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1251 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1252 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 } 1255 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1256 } else { 1257 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1258 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(column_values);CHKERRQ(ierr); 1263 1264 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1265 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include <petscdraw.h> 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1275 PetscErrorCode ierr; 1276 PetscMPIInt rank = aij->rank,size = aij->size; 1277 PetscBool isdraw,iascii,isbinary; 1278 PetscViewer sviewer; 1279 PetscViewerFormat format; 1280 1281 PetscFunctionBegin; 1282 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1283 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1284 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1285 if (iascii) { 1286 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1287 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1288 MatInfo info; 1289 PetscBool inodes; 1290 1291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1292 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1293 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1294 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1295 if (!inodes) { 1296 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1301 } 1302 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1304 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1306 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1309 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1312 PetscInt inodecount,inodelimit,*inodes; 1313 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1314 if (inodes) { 1315 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1316 } else { 1317 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1318 } 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1321 PetscFunctionReturn(0); 1322 } 1323 } else if (isbinary) { 1324 if (size == 1) { 1325 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1326 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1327 } else { 1328 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (isdraw) { 1332 PetscDraw draw; 1333 PetscBool isnull; 1334 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1335 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1336 } 1337 1338 if (size == 1) { 1339 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1340 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1341 } else { 1342 /* assemble the entire matrix onto first processor. */ 1343 Mat A; 1344 Mat_SeqAIJ *Aloc; 1345 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1346 MatScalar *a; 1347 1348 if (mat->rmap->N > 1024) { 1349 PetscBool flg = PETSC_FALSE; 1350 1351 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1352 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1353 } 1354 1355 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1356 if (!rank) { 1357 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1358 } else { 1359 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1360 } 1361 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1362 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1363 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1364 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1365 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1366 1367 /* copy over the A part */ 1368 Aloc = (Mat_SeqAIJ*)aij->A->data; 1369 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1370 row = mat->rmap->rstart; 1371 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1372 for (i=0; i<m; i++) { 1373 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1374 row++; 1375 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1376 } 1377 aj = Aloc->j; 1378 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1379 1380 /* copy over the B part */ 1381 Aloc = (Mat_SeqAIJ*)aij->B->data; 1382 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1383 row = mat->rmap->rstart; 1384 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1385 ct = cols; 1386 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1387 for (i=0; i<m; i++) { 1388 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1389 row++; 1390 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1391 } 1392 ierr = PetscFree(ct);CHKERRQ(ierr); 1393 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1394 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1395 /* 1396 Everyone has to call to draw the matrix since the graphics waits are 1397 synchronized across all processors that share the PetscDraw object 1398 */ 1399 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1400 if (!rank) { 1401 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1402 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1403 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1404 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1405 } 1406 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A);CHKERRQ(ierr); 1408 } 1409 PetscFunctionReturn(0); 1410 } 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatView_MPIAIJ" 1414 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1415 { 1416 PetscErrorCode ierr; 1417 PetscBool iascii,isdraw,issocket,isbinary; 1418 1419 PetscFunctionBegin; 1420 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1421 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1422 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1423 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1424 if (iascii || isdraw || isbinary || issocket) { 1425 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatSOR_MPIAIJ" 1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1433 { 1434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1435 PetscErrorCode ierr; 1436 Vec bb1 = 0; 1437 PetscBool hasop; 1438 1439 PetscFunctionBegin; 1440 if (flag == SOR_APPLY_UPPER) { 1441 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1446 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1447 } 1448 1449 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1452 its--; 1453 } 1454 1455 while (its--) { 1456 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1461 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1462 1463 /* local sweep */ 1464 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1465 } 1466 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1469 its--; 1470 } 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1483 if (flag & SOR_ZERO_INITIAL_GUESS) { 1484 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1485 its--; 1486 } 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_EISENSTAT) { 1499 Vec xx1; 1500 1501 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1503 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 if (!mat->diag) { 1507 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1508 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1509 } 1510 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1511 if (hasop) { 1512 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1513 } else { 1514 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1515 } 1516 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1517 1518 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1522 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1523 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1524 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1525 1526 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatPermute_MPIAIJ" 1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1533 { 1534 Mat aA,aB,Aperm; 1535 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1536 PetscScalar *aa,*ba; 1537 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1538 PetscSF rowsf,sf; 1539 IS parcolp = NULL; 1540 PetscBool done; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1545 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1546 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1547 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1548 1549 /* Invert row permutation to find out where my rows should go */ 1550 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1551 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1552 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1553 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1554 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1555 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1556 1557 /* Invert column permutation to find out where my columns should go */ 1558 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1559 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1560 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1561 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1562 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1563 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1565 1566 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1567 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1568 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1569 1570 /* Find out where my gcols should go */ 1571 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1572 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1573 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1574 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1575 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1576 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1577 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1578 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1579 1580 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1581 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1582 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1583 for (i=0; i<m; i++) { 1584 PetscInt row = rdest[i],rowner; 1585 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1586 for (j=ai[i]; j<ai[i+1]; j++) { 1587 PetscInt cowner,col = cdest[aj[j]]; 1588 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 for (j=bi[i]; j<bi[i+1]; j++) { 1593 PetscInt cowner,col = gcdest[bj[j]]; 1594 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1595 if (rowner == cowner) dnnz[i]++; 1596 else onnz[i]++; 1597 } 1598 } 1599 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1600 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1601 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1602 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1603 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1604 1605 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1606 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1607 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1608 for (i=0; i<m; i++) { 1609 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1610 PetscInt j0,rowlen; 1611 rowlen = ai[i+1] - ai[i]; 1612 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1613 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1614 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1615 } 1616 rowlen = bi[i+1] - bi[i]; 1617 for (j0=j=0; j<rowlen; j0=j) { 1618 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1619 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1620 } 1621 } 1622 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1623 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1624 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1625 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1626 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1627 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1628 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1629 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1630 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1631 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1632 *B = Aperm; 1633 PetscFunctionReturn(0); 1634 } 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1638 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1639 { 1640 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1641 Mat A = mat->A,B = mat->B; 1642 PetscErrorCode ierr; 1643 PetscReal isend[5],irecv[5]; 1644 1645 PetscFunctionBegin; 1646 info->block_size = 1.0; 1647 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1648 1649 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1650 isend[3] = info->memory; isend[4] = info->mallocs; 1651 1652 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1653 1654 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1655 isend[3] += info->memory; isend[4] += info->mallocs; 1656 if (flag == MAT_LOCAL) { 1657 info->nz_used = isend[0]; 1658 info->nz_allocated = isend[1]; 1659 info->nz_unneeded = isend[2]; 1660 info->memory = isend[3]; 1661 info->mallocs = isend[4]; 1662 } else if (flag == MAT_GLOBAL_MAX) { 1663 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1664 1665 info->nz_used = irecv[0]; 1666 info->nz_allocated = irecv[1]; 1667 info->nz_unneeded = irecv[2]; 1668 info->memory = irecv[3]; 1669 info->mallocs = irecv[4]; 1670 } else if (flag == MAT_GLOBAL_SUM) { 1671 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1672 1673 info->nz_used = irecv[0]; 1674 info->nz_allocated = irecv[1]; 1675 info->nz_unneeded = irecv[2]; 1676 info->memory = irecv[3]; 1677 info->mallocs = irecv[4]; 1678 } 1679 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1680 info->fill_ratio_needed = 0; 1681 info->factor_mallocs = 0; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatSetOption_MPIAIJ" 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 PetscErrorCode ierr; 1691 1692 PetscFunctionBegin; 1693 switch (op) { 1694 case MAT_NEW_NONZERO_LOCATIONS: 1695 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1696 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1697 case MAT_KEEP_NONZERO_PATTERN: 1698 case MAT_NEW_NONZERO_LOCATION_ERR: 1699 case MAT_USE_INODES: 1700 case MAT_IGNORE_ZERO_ENTRIES: 1701 MatCheckPreallocated(A,1); 1702 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1703 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 a->roworiented = flg; 1707 1708 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1709 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1710 break; 1711 case MAT_NEW_DIAGONALS: 1712 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1713 break; 1714 case MAT_IGNORE_OFF_PROC_ENTRIES: 1715 a->donotstash = flg; 1716 break; 1717 case MAT_SPD: 1718 A->spd_set = PETSC_TRUE; 1719 A->spd = flg; 1720 if (flg) { 1721 A->symmetric = PETSC_TRUE; 1722 A->structurally_symmetric = PETSC_TRUE; 1723 A->symmetric_set = PETSC_TRUE; 1724 A->structurally_symmetric_set = PETSC_TRUE; 1725 } 1726 break; 1727 case MAT_SYMMETRIC: 1728 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1729 break; 1730 case MAT_STRUCTURALLY_SYMMETRIC: 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 break; 1733 case MAT_HERMITIAN: 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_SYMMETRY_ETERNAL: 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 default: 1740 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatGetRow_MPIAIJ" 1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1751 PetscErrorCode ierr; 1752 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1753 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1754 PetscInt *cmap,*idx_p; 1755 1756 PetscFunctionBegin; 1757 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1758 mat->getrowactive = PETSC_TRUE; 1759 1760 if (!mat->rowvalues && (idx || v)) { 1761 /* 1762 allocate enough space to hold information from the longest row. 1763 */ 1764 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1765 PetscInt max = 1,tmp; 1766 for (i=0; i<matin->rmap->n; i++) { 1767 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1768 if (max < tmp) max = tmp; 1769 } 1770 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1771 } 1772 1773 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1774 lrow = row - rstart; 1775 1776 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1777 if (!v) {pvA = 0; pvB = 0;} 1778 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1779 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1780 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1781 nztot = nzA + nzB; 1782 1783 cmap = mat->garray; 1784 if (v || idx) { 1785 if (nztot) { 1786 /* Sort by increasing column numbers, assuming A and B already sorted */ 1787 PetscInt imark = -1; 1788 if (v) { 1789 *v = v_p = mat->rowvalues; 1790 for (i=0; i<nzB; i++) { 1791 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1792 else break; 1793 } 1794 imark = i; 1795 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1796 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1797 } 1798 if (idx) { 1799 *idx = idx_p = mat->rowindices; 1800 if (imark > -1) { 1801 for (i=0; i<imark; i++) { 1802 idx_p[i] = cmap[cworkB[i]]; 1803 } 1804 } else { 1805 for (i=0; i<nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1812 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = 0; 1816 if (v) *v = 0; 1817 } 1818 } 1819 *nz = nztot; 1820 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1821 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1827 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1830 1831 PetscFunctionBegin; 1832 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1833 aij->getrowactive = PETSC_FALSE; 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatNorm_MPIAIJ" 1839 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1840 { 1841 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1842 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1843 PetscErrorCode ierr; 1844 PetscInt i,j,cstart = mat->cmap->rstart; 1845 PetscReal sum = 0.0; 1846 MatScalar *v; 1847 1848 PetscFunctionBegin; 1849 if (aij->size == 1) { 1850 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1851 } else { 1852 if (type == NORM_FROBENIUS) { 1853 v = amat->a; 1854 for (i=0; i<amat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 v = bmat->a; 1858 for (i=0; i<bmat->nz; i++) { 1859 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1860 } 1861 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1862 *norm = PetscSqrtReal(*norm); 1863 } else if (type == NORM_1) { /* max column norm */ 1864 PetscReal *tmp,*tmp2; 1865 PetscInt *jj,*garray = aij->garray; 1866 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1867 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1868 *norm = 0.0; 1869 v = amat->a; jj = amat->j; 1870 for (j=0; j<amat->nz; j++) { 1871 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1872 } 1873 v = bmat->a; jj = bmat->j; 1874 for (j=0; j<bmat->nz; j++) { 1875 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1876 } 1877 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1878 for (j=0; j<mat->cmap->N; j++) { 1879 if (tmp2[j] > *norm) *norm = tmp2[j]; 1880 } 1881 ierr = PetscFree(tmp);CHKERRQ(ierr); 1882 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1883 } else if (type == NORM_INFINITY) { /* max row norm */ 1884 PetscReal ntemp = 0.0; 1885 for (j=0; j<aij->A->rmap->n; j++) { 1886 v = amat->a + amat->i[j]; 1887 sum = 0.0; 1888 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1889 sum += PetscAbsScalar(*v); v++; 1890 } 1891 v = bmat->a + bmat->i[j]; 1892 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); v++; 1894 } 1895 if (sum > ntemp) ntemp = sum; 1896 } 1897 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1898 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 #undef __FUNCT__ 1904 #define __FUNCT__ "MatTranspose_MPIAIJ" 1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1906 { 1907 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1908 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1909 PetscErrorCode ierr; 1910 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1911 PetscInt cstart = A->cmap->rstart,ncol; 1912 Mat B; 1913 MatScalar *array; 1914 1915 PetscFunctionBegin; 1916 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1917 1918 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1919 ai = Aloc->i; aj = Aloc->j; 1920 bi = Bloc->i; bj = Bloc->j; 1921 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1922 PetscInt *d_nnz,*g_nnz,*o_nnz; 1923 PetscSFNode *oloc; 1924 PETSC_UNUSED PetscSF sf; 1925 1926 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1927 /* compute d_nnz for preallocation */ 1928 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1929 for (i=0; i<ai[ma]; i++) { 1930 d_nnz[aj[i]]++; 1931 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1932 } 1933 /* compute local off-diagonal contributions */ 1934 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1935 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1936 /* map those to global */ 1937 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1938 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1939 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1940 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1941 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1942 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1943 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1944 1945 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1946 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1947 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1948 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1949 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1950 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1951 } else { 1952 B = *matout; 1953 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1954 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1955 } 1956 1957 /* copy over the A part */ 1958 array = Aloc->a; 1959 row = A->rmap->rstart; 1960 for (i=0; i<ma; i++) { 1961 ncol = ai[i+1]-ai[i]; 1962 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1963 row++; 1964 array += ncol; aj += ncol; 1965 } 1966 aj = Aloc->j; 1967 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1968 1969 /* copy over the B part */ 1970 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1971 array = Bloc->a; 1972 row = A->rmap->rstart; 1973 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1974 cols_tmp = cols; 1975 for (i=0; i<mb; i++) { 1976 ncol = bi[i+1]-bi[i]; 1977 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1978 row++; 1979 array += ncol; cols_tmp += ncol; 1980 } 1981 ierr = PetscFree(cols);CHKERRQ(ierr); 1982 1983 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1984 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1986 *matout = B; 1987 } else { 1988 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNCT__ 1994 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1995 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1996 { 1997 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1998 Mat a = aij->A,b = aij->B; 1999 PetscErrorCode ierr; 2000 PetscInt s1,s2,s3; 2001 2002 PetscFunctionBegin; 2003 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2004 if (rr) { 2005 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2006 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2007 /* Overlap communication with computation. */ 2008 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2009 } 2010 if (ll) { 2011 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2012 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2013 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2014 } 2015 /* scale the diagonal block */ 2016 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2017 2018 if (rr) { 2019 /* Do a scatter end and then right scale the off-diagonal block */ 2020 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2021 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2028 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2029 { 2030 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2031 PetscErrorCode ierr; 2032 2033 PetscFunctionBegin; 2034 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatEqual_MPIAIJ" 2040 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2041 { 2042 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2043 Mat a,b,c,d; 2044 PetscBool flg; 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 a = matA->A; b = matA->B; 2049 c = matB->A; d = matB->B; 2050 2051 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2052 if (flg) { 2053 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2054 } 2055 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 #undef __FUNCT__ 2060 #define __FUNCT__ "MatCopy_MPIAIJ" 2061 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2062 { 2063 PetscErrorCode ierr; 2064 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2065 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2066 2067 PetscFunctionBegin; 2068 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2069 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2070 /* because of the column compression in the off-processor part of the matrix a->B, 2071 the number of columns in a->B and b->B may be different, hence we cannot call 2072 the MatCopy() directly on the two parts. If need be, we can provide a more 2073 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2074 then copying the submatrices */ 2075 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2076 } else { 2077 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2078 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatSetUp_MPIAIJ" 2085 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2086 { 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2096 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2097 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2098 { 2099 PetscInt i,m=Y->rmap->N; 2100 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2101 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2102 const PetscInt *xi = x->i,*yi = y->i; 2103 2104 PetscFunctionBegin; 2105 /* Set the number of nonzeros in the new matrix */ 2106 for (i=0; i<m; i++) { 2107 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2108 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2109 nnz[i] = 0; 2110 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2111 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2112 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2113 nnz[i]++; 2114 } 2115 for (; k<nzy; k++) nnz[i]++; 2116 } 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatAXPY_MPIAIJ" 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2123 { 2124 PetscErrorCode ierr; 2125 PetscInt i; 2126 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2127 PetscBLASInt bnz,one=1; 2128 Mat_SeqAIJ *x,*y; 2129 2130 PetscFunctionBegin; 2131 if (str == SAME_NONZERO_PATTERN) { 2132 PetscScalar alpha = a; 2133 x = (Mat_SeqAIJ*)xx->A->data; 2134 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2135 y = (Mat_SeqAIJ*)yy->A->data; 2136 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2137 x = (Mat_SeqAIJ*)xx->B->data; 2138 y = (Mat_SeqAIJ*)yy->B->data; 2139 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2140 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2141 } else if (str == SUBSET_NONZERO_PATTERN) { 2142 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2143 2144 x = (Mat_SeqAIJ*)xx->B->data; 2145 y = (Mat_SeqAIJ*)yy->B->data; 2146 if (y->xtoy && y->XtoY != xx->B) { 2147 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2148 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2149 } 2150 if (!y->xtoy) { /* get xtoy */ 2151 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2152 y->XtoY = xx->B; 2153 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2154 } 2155 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2156 } else { 2157 Mat B; 2158 PetscInt *nnz_d,*nnz_o; 2159 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2160 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2161 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2162 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2163 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2164 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2165 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2166 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2167 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2168 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2169 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2170 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2171 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2172 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2178 2179 #undef __FUNCT__ 2180 #define __FUNCT__ "MatConjugate_MPIAIJ" 2181 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2182 { 2183 #if defined(PETSC_USE_COMPLEX) 2184 PetscErrorCode ierr; 2185 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2186 2187 PetscFunctionBegin; 2188 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2189 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2190 #else 2191 PetscFunctionBegin; 2192 #endif 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatRealPart_MPIAIJ" 2198 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2199 { 2200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2201 PetscErrorCode ierr; 2202 2203 PetscFunctionBegin; 2204 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2205 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2211 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2212 { 2213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2218 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 #if defined(PETSC_HAVE_PBGL) 2223 2224 #include <boost/parallel/mpi/bsp_process_group.hpp> 2225 #include <boost/graph/distributed/ilu_default_graph.hpp> 2226 #include <boost/graph/distributed/ilu_0_block.hpp> 2227 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2228 #include <boost/graph/distributed/petsc/interface.hpp> 2229 #include <boost/multi_array.hpp> 2230 #include <boost/parallel/distributed_property_map->hpp> 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2234 /* 2235 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2236 */ 2237 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2238 { 2239 namespace petsc = boost::distributed::petsc; 2240 2241 namespace graph_dist = boost::graph::distributed; 2242 using boost::graph::distributed::ilu_default::process_group_type; 2243 using boost::graph::ilu_permuted; 2244 2245 PetscBool row_identity, col_identity; 2246 PetscContainer c; 2247 PetscInt m, n, M, N; 2248 PetscErrorCode ierr; 2249 2250 PetscFunctionBegin; 2251 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2252 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2253 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2254 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2255 2256 process_group_type pg; 2257 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2258 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2259 lgraph_type& level_graph = *lgraph_p; 2260 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2261 2262 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2263 ilu_permuted(level_graph); 2264 2265 /* put together the new matrix */ 2266 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2267 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2268 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2269 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2270 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2271 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2272 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 2275 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2276 ierr = PetscContainerSetPointer(c, lgraph_p); 2277 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2278 ierr = PetscContainerDestroy(&c); 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2284 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2285 { 2286 PetscFunctionBegin; 2287 PetscFunctionReturn(0); 2288 } 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatSolve_MPIAIJ" 2292 /* 2293 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2294 */ 2295 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2296 { 2297 namespace graph_dist = boost::graph::distributed; 2298 2299 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2300 lgraph_type *lgraph_p; 2301 PetscContainer c; 2302 PetscErrorCode ierr; 2303 2304 PetscFunctionBegin; 2305 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2306 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2307 ierr = VecCopy(b, x);CHKERRQ(ierr); 2308 2309 PetscScalar *array_x; 2310 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2311 PetscInt sx; 2312 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2313 2314 PetscScalar *array_b; 2315 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2316 PetscInt sb; 2317 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2318 2319 lgraph_type& level_graph = *lgraph_p; 2320 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2321 2322 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2323 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2324 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2325 2326 typedef boost::iterator_property_map<array_ref_type::iterator, 2327 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2328 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2329 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2330 2331 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2332 PetscFunctionReturn(0); 2333 } 2334 #endif 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatDestroy_MatRedundant" 2338 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2339 { 2340 PetscErrorCode ierr; 2341 Mat_Redundant *redund; 2342 PetscInt i; 2343 PetscMPIInt size; 2344 2345 PetscFunctionBegin; 2346 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2347 if (size == 1) { 2348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2349 redund = a->redundant; 2350 } else { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2352 redund = a->redundant; 2353 } 2354 if (redund){ 2355 if (redund->matseq) { /* via MatGetSubMatrices() */ 2356 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2357 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2358 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2359 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2360 } else { 2361 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2362 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2363 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2364 for (i=0; i<redund->nrecvs; i++) { 2365 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2366 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2367 } 2368 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2369 } 2370 2371 if (redund->psubcomm) { 2372 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2373 } 2374 ierr = redund->Destroy(A);CHKERRQ(ierr); 2375 ierr = PetscFree(redund);CHKERRQ(ierr); 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 #undef __FUNCT__ 2381 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2382 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2383 { 2384 PetscMPIInt rank,size; 2385 MPI_Comm comm; 2386 PetscErrorCode ierr; 2387 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2388 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2389 PetscInt *rowrange = mat->rmap->range; 2390 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2391 Mat A = aij->A,B=aij->B,C=*matredundant; 2392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2393 PetscScalar *sbuf_a; 2394 PetscInt nzlocal=a->nz+b->nz; 2395 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2396 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2397 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2398 MatScalar *aworkA,*aworkB; 2399 PetscScalar *vals; 2400 PetscMPIInt tag1,tag2,tag3,imdex; 2401 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2402 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2403 MPI_Status recv_status,*send_status; 2404 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2405 PetscInt **rbuf_j=NULL; 2406 PetscScalar **rbuf_a=NULL; 2407 Mat_Redundant *redund =NULL; 2408 2409 PetscFunctionBegin; 2410 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2411 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2412 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2413 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2414 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2415 2416 if (reuse == MAT_REUSE_MATRIX) { 2417 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2418 if (subsize == 1) { 2419 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2420 redund = c->redundant; 2421 } else { 2422 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2423 redund = c->redundant; 2424 } 2425 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2426 2427 nsends = redund->nsends; 2428 nrecvs = redund->nrecvs; 2429 send_rank = redund->send_rank; 2430 recv_rank = redund->recv_rank; 2431 sbuf_nz = redund->sbuf_nz; 2432 rbuf_nz = redund->rbuf_nz; 2433 sbuf_j = redund->sbuf_j; 2434 sbuf_a = redund->sbuf_a; 2435 rbuf_j = redund->rbuf_j; 2436 rbuf_a = redund->rbuf_a; 2437 } 2438 2439 if (reuse == MAT_INITIAL_MATRIX) { 2440 PetscInt nleftover,np_subcomm; 2441 2442 /* get the destination processors' id send_rank, nsends and nrecvs */ 2443 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2444 2445 np_subcomm = size/nsubcomm; 2446 nleftover = size - nsubcomm*np_subcomm; 2447 2448 /* block of codes below is specific for INTERLACED */ 2449 /* ------------------------------------------------*/ 2450 nsends = 0; nrecvs = 0; 2451 for (i=0; i<size; i++) { 2452 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2453 send_rank[nsends++] = i; 2454 recv_rank[nrecvs++] = i; 2455 } 2456 } 2457 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2458 i = size-nleftover-1; 2459 j = 0; 2460 while (j < nsubcomm - nleftover) { 2461 send_rank[nsends++] = i; 2462 i--; j++; 2463 } 2464 } 2465 2466 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2467 for (i=0; i<nleftover; i++) { 2468 recv_rank[nrecvs++] = size-nleftover+i; 2469 } 2470 } 2471 /*----------------------------------------------*/ 2472 2473 /* allocate sbuf_j, sbuf_a */ 2474 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2475 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2476 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2477 /* 2478 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2479 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2480 */ 2481 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2482 2483 /* copy mat's local entries into the buffers */ 2484 if (reuse == MAT_INITIAL_MATRIX) { 2485 rownz_max = 0; 2486 rptr = sbuf_j; 2487 cols = sbuf_j + rend-rstart + 1; 2488 vals = sbuf_a; 2489 rptr[0] = 0; 2490 for (i=0; i<rend-rstart; i++) { 2491 row = i + rstart; 2492 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2493 ncols = nzA + nzB; 2494 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2495 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2496 /* load the column indices for this row into cols */ 2497 lwrite = 0; 2498 for (l=0; l<nzB; l++) { 2499 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2500 vals[lwrite] = aworkB[l]; 2501 cols[lwrite++] = ctmp; 2502 } 2503 } 2504 for (l=0; l<nzA; l++) { 2505 vals[lwrite] = aworkA[l]; 2506 cols[lwrite++] = cstart + cworkA[l]; 2507 } 2508 for (l=0; l<nzB; l++) { 2509 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2510 vals[lwrite] = aworkB[l]; 2511 cols[lwrite++] = ctmp; 2512 } 2513 } 2514 vals += ncols; 2515 cols += ncols; 2516 rptr[i+1] = rptr[i] + ncols; 2517 if (rownz_max < ncols) rownz_max = ncols; 2518 } 2519 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2520 } else { /* only copy matrix values into sbuf_a */ 2521 rptr = sbuf_j; 2522 vals = sbuf_a; 2523 rptr[0] = 0; 2524 for (i=0; i<rend-rstart; i++) { 2525 row = i + rstart; 2526 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2527 ncols = nzA + nzB; 2528 cworkB = b->j + b->i[i]; 2529 aworkA = a->a + a->i[i]; 2530 aworkB = b->a + b->i[i]; 2531 lwrite = 0; 2532 for (l=0; l<nzB; l++) { 2533 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2534 } 2535 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2536 for (l=0; l<nzB; l++) { 2537 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2538 } 2539 vals += ncols; 2540 rptr[i+1] = rptr[i] + ncols; 2541 } 2542 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2543 2544 /* send nzlocal to others, and recv other's nzlocal */ 2545 /*--------------------------------------------------*/ 2546 if (reuse == MAT_INITIAL_MATRIX) { 2547 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2548 2549 s_waits2 = s_waits3 + nsends; 2550 s_waits1 = s_waits2 + nsends; 2551 r_waits1 = s_waits1 + nsends; 2552 r_waits2 = r_waits1 + nrecvs; 2553 r_waits3 = r_waits2 + nrecvs; 2554 } else { 2555 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2556 2557 r_waits3 = s_waits3 + nsends; 2558 } 2559 2560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2561 if (reuse == MAT_INITIAL_MATRIX) { 2562 /* get new tags to keep the communication clean */ 2563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2564 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2565 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2566 2567 /* post receives of other's nzlocal */ 2568 for (i=0; i<nrecvs; i++) { 2569 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2570 } 2571 /* send nzlocal to others */ 2572 for (i=0; i<nsends; i++) { 2573 sbuf_nz[i] = nzlocal; 2574 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2575 } 2576 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2577 count = nrecvs; 2578 while (count) { 2579 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2580 2581 recv_rank[imdex] = recv_status.MPI_SOURCE; 2582 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2583 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2584 2585 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2586 2587 rbuf_nz[imdex] += i + 2; 2588 2589 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2590 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2591 count--; 2592 } 2593 /* wait on sends of nzlocal */ 2594 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2595 /* send mat->i,j to others, and recv from other's */ 2596 /*------------------------------------------------*/ 2597 for (i=0; i<nsends; i++) { 2598 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2599 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2600 } 2601 /* wait on receives of mat->i,j */ 2602 /*------------------------------*/ 2603 count = nrecvs; 2604 while (count) { 2605 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2606 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2607 count--; 2608 } 2609 /* wait on sends of mat->i,j */ 2610 /*---------------------------*/ 2611 if (nsends) { 2612 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2613 } 2614 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2615 2616 /* post receives, send and receive mat->a */ 2617 /*----------------------------------------*/ 2618 for (imdex=0; imdex<nrecvs; imdex++) { 2619 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2620 } 2621 for (i=0; i<nsends; i++) { 2622 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2623 } 2624 count = nrecvs; 2625 while (count) { 2626 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2627 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2628 count--; 2629 } 2630 if (nsends) { 2631 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2632 } 2633 2634 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2635 2636 /* create redundant matrix */ 2637 /*-------------------------*/ 2638 if (reuse == MAT_INITIAL_MATRIX) { 2639 const PetscInt *range; 2640 PetscInt rstart_sub,rend_sub,mloc_sub; 2641 2642 /* compute rownz_max for preallocation */ 2643 for (imdex=0; imdex<nrecvs; imdex++) { 2644 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2645 rptr = rbuf_j[imdex]; 2646 for (i=0; i<j; i++) { 2647 ncols = rptr[i+1] - rptr[i]; 2648 if (rownz_max < ncols) rownz_max = ncols; 2649 } 2650 } 2651 2652 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2653 2654 /* get local size of redundant matrix 2655 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2656 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2657 rstart_sub = range[nsubcomm*subrank]; 2658 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2659 rend_sub = range[nsubcomm*(subrank+1)]; 2660 } else { 2661 rend_sub = mat->rmap->N; 2662 } 2663 mloc_sub = rend_sub - rstart_sub; 2664 2665 if (M == N) { 2666 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2667 } else { /* non-square matrix */ 2668 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2669 } 2670 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2671 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2672 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2673 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2674 } else { 2675 C = *matredundant; 2676 } 2677 2678 /* insert local matrix entries */ 2679 rptr = sbuf_j; 2680 cols = sbuf_j + rend-rstart + 1; 2681 vals = sbuf_a; 2682 for (i=0; i<rend-rstart; i++) { 2683 row = i + rstart; 2684 ncols = rptr[i+1] - rptr[i]; 2685 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2686 vals += ncols; 2687 cols += ncols; 2688 } 2689 /* insert received matrix entries */ 2690 for (imdex=0; imdex<nrecvs; imdex++) { 2691 rstart = rowrange[recv_rank[imdex]]; 2692 rend = rowrange[recv_rank[imdex]+1]; 2693 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2694 rptr = rbuf_j[imdex]; 2695 cols = rbuf_j[imdex] + rend-rstart + 1; 2696 vals = rbuf_a[imdex]; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 ncols = rptr[i+1] - rptr[i]; 2700 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2701 vals += ncols; 2702 cols += ncols; 2703 } 2704 } 2705 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2706 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 2708 if (reuse == MAT_INITIAL_MATRIX) { 2709 *matredundant = C; 2710 2711 /* create a supporting struct and attach it to C for reuse */ 2712 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2713 if (subsize == 1) { 2714 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2715 c->redundant = redund; 2716 } else { 2717 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2718 c->redundant = redund; 2719 } 2720 2721 redund->nzlocal = nzlocal; 2722 redund->nsends = nsends; 2723 redund->nrecvs = nrecvs; 2724 redund->send_rank = send_rank; 2725 redund->recv_rank = recv_rank; 2726 redund->sbuf_nz = sbuf_nz; 2727 redund->rbuf_nz = rbuf_nz; 2728 redund->sbuf_j = sbuf_j; 2729 redund->sbuf_a = sbuf_a; 2730 redund->rbuf_j = rbuf_j; 2731 redund->rbuf_a = rbuf_a; 2732 redund->psubcomm = NULL; 2733 2734 redund->Destroy = C->ops->destroy; 2735 C->ops->destroy = MatDestroy_MatRedundant; 2736 } 2737 PetscFunctionReturn(0); 2738 } 2739 2740 #undef __FUNCT__ 2741 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2742 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2743 { 2744 PetscErrorCode ierr; 2745 MPI_Comm comm; 2746 PetscMPIInt size,subsize; 2747 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2748 Mat_Redundant *redund=NULL; 2749 PetscSubcomm psubcomm=NULL; 2750 MPI_Comm subcomm_in=subcomm; 2751 Mat *matseq; 2752 IS isrow,iscol; 2753 2754 PetscFunctionBegin; 2755 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2756 if (reuse == MAT_INITIAL_MATRIX) { 2757 /* create psubcomm, then get subcomm */ 2758 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2759 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2760 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2761 2762 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2763 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2764 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2765 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2766 subcomm = psubcomm->comm; 2767 } else { /* retrieve psubcomm and subcomm */ 2768 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2769 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2770 if (subsize == 1) { 2771 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2772 redund = c->redundant; 2773 } else { 2774 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2775 redund = c->redundant; 2776 } 2777 psubcomm = redund->psubcomm; 2778 } 2779 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2780 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2781 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2782 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2783 if (subsize == 1) { 2784 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2785 c->redundant->psubcomm = psubcomm; 2786 } else { 2787 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2788 c->redundant->psubcomm = psubcomm ; 2789 } 2790 } 2791 PetscFunctionReturn(0); 2792 } 2793 } 2794 2795 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2796 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2797 if (reuse == MAT_INITIAL_MATRIX) { 2798 /* create a local sequential matrix matseq[0] */ 2799 mloc_sub = PETSC_DECIDE; 2800 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2801 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2802 rstart = rend - mloc_sub; 2803 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2804 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2805 } else { /* reuse == MAT_REUSE_MATRIX */ 2806 if (subsize == 1) { 2807 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2808 redund = c->redundant; 2809 } else { 2810 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2811 redund = c->redundant; 2812 } 2813 2814 isrow = redund->isrow; 2815 iscol = redund->iscol; 2816 matseq = redund->matseq; 2817 } 2818 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2819 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2820 2821 if (reuse == MAT_INITIAL_MATRIX) { 2822 /* create a supporting struct and attach it to C for reuse */ 2823 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2824 if (subsize == 1) { 2825 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2826 c->redundant = redund; 2827 } else { 2828 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2829 c->redundant = redund; 2830 } 2831 redund->isrow = isrow; 2832 redund->iscol = iscol; 2833 redund->matseq = matseq; 2834 redund->psubcomm = psubcomm; 2835 redund->Destroy = (*matredundant)->ops->destroy; 2836 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2837 } 2838 PetscFunctionReturn(0); 2839 } 2840 2841 #undef __FUNCT__ 2842 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2843 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2844 { 2845 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2846 PetscErrorCode ierr; 2847 PetscInt i,*idxb = 0; 2848 PetscScalar *va,*vb; 2849 Vec vtmp; 2850 2851 PetscFunctionBegin; 2852 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2853 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2854 if (idx) { 2855 for (i=0; i<A->rmap->n; i++) { 2856 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2857 } 2858 } 2859 2860 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2861 if (idx) { 2862 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2863 } 2864 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2865 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2866 2867 for (i=0; i<A->rmap->n; i++) { 2868 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2869 va[i] = vb[i]; 2870 if (idx) idx[i] = a->garray[idxb[i]]; 2871 } 2872 } 2873 2874 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2875 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2876 ierr = PetscFree(idxb);CHKERRQ(ierr); 2877 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2878 PetscFunctionReturn(0); 2879 } 2880 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2883 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2884 { 2885 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2886 PetscErrorCode ierr; 2887 PetscInt i,*idxb = 0; 2888 PetscScalar *va,*vb; 2889 Vec vtmp; 2890 2891 PetscFunctionBegin; 2892 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2893 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2894 if (idx) { 2895 for (i=0; i<A->cmap->n; i++) { 2896 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2897 } 2898 } 2899 2900 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2901 if (idx) { 2902 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2903 } 2904 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2905 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2906 2907 for (i=0; i<A->rmap->n; i++) { 2908 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2909 va[i] = vb[i]; 2910 if (idx) idx[i] = a->garray[idxb[i]]; 2911 } 2912 } 2913 2914 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2915 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2916 ierr = PetscFree(idxb);CHKERRQ(ierr); 2917 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2918 PetscFunctionReturn(0); 2919 } 2920 2921 #undef __FUNCT__ 2922 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2923 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2924 { 2925 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2926 PetscInt n = A->rmap->n; 2927 PetscInt cstart = A->cmap->rstart; 2928 PetscInt *cmap = mat->garray; 2929 PetscInt *diagIdx, *offdiagIdx; 2930 Vec diagV, offdiagV; 2931 PetscScalar *a, *diagA, *offdiagA; 2932 PetscInt r; 2933 PetscErrorCode ierr; 2934 2935 PetscFunctionBegin; 2936 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2937 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2938 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2939 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2940 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2941 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2942 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2943 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2944 for (r = 0; r < n; ++r) { 2945 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2946 a[r] = diagA[r]; 2947 idx[r] = cstart + diagIdx[r]; 2948 } else { 2949 a[r] = offdiagA[r]; 2950 idx[r] = cmap[offdiagIdx[r]]; 2951 } 2952 } 2953 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2954 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2955 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2956 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2957 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2958 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2959 PetscFunctionReturn(0); 2960 } 2961 2962 #undef __FUNCT__ 2963 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2964 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2965 { 2966 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2967 PetscInt n = A->rmap->n; 2968 PetscInt cstart = A->cmap->rstart; 2969 PetscInt *cmap = mat->garray; 2970 PetscInt *diagIdx, *offdiagIdx; 2971 Vec diagV, offdiagV; 2972 PetscScalar *a, *diagA, *offdiagA; 2973 PetscInt r; 2974 PetscErrorCode ierr; 2975 2976 PetscFunctionBegin; 2977 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2978 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2979 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2980 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2981 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2982 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2983 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2984 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2985 for (r = 0; r < n; ++r) { 2986 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2987 a[r] = diagA[r]; 2988 idx[r] = cstart + diagIdx[r]; 2989 } else { 2990 a[r] = offdiagA[r]; 2991 idx[r] = cmap[offdiagIdx[r]]; 2992 } 2993 } 2994 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2995 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2996 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2997 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2998 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2999 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3005 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3006 { 3007 PetscErrorCode ierr; 3008 Mat *dummy; 3009 3010 PetscFunctionBegin; 3011 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3012 *newmat = *dummy; 3013 ierr = PetscFree(dummy);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3019 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3020 { 3021 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3026 PetscFunctionReturn(0); 3027 } 3028 3029 #undef __FUNCT__ 3030 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3031 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3032 { 3033 PetscErrorCode ierr; 3034 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3035 3036 PetscFunctionBegin; 3037 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3038 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3039 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 /* -------------------------------------------------------------------*/ 3045 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3046 MatGetRow_MPIAIJ, 3047 MatRestoreRow_MPIAIJ, 3048 MatMult_MPIAIJ, 3049 /* 4*/ MatMultAdd_MPIAIJ, 3050 MatMultTranspose_MPIAIJ, 3051 MatMultTransposeAdd_MPIAIJ, 3052 #if defined(PETSC_HAVE_PBGL) 3053 MatSolve_MPIAIJ, 3054 #else 3055 0, 3056 #endif 3057 0, 3058 0, 3059 /*10*/ 0, 3060 0, 3061 0, 3062 MatSOR_MPIAIJ, 3063 MatTranspose_MPIAIJ, 3064 /*15*/ MatGetInfo_MPIAIJ, 3065 MatEqual_MPIAIJ, 3066 MatGetDiagonal_MPIAIJ, 3067 MatDiagonalScale_MPIAIJ, 3068 MatNorm_MPIAIJ, 3069 /*20*/ MatAssemblyBegin_MPIAIJ, 3070 MatAssemblyEnd_MPIAIJ, 3071 MatSetOption_MPIAIJ, 3072 MatZeroEntries_MPIAIJ, 3073 /*24*/ MatZeroRows_MPIAIJ, 3074 0, 3075 #if defined(PETSC_HAVE_PBGL) 3076 0, 3077 #else 3078 0, 3079 #endif 3080 0, 3081 0, 3082 /*29*/ MatSetUp_MPIAIJ, 3083 #if defined(PETSC_HAVE_PBGL) 3084 0, 3085 #else 3086 0, 3087 #endif 3088 0, 3089 0, 3090 0, 3091 /*34*/ MatDuplicate_MPIAIJ, 3092 0, 3093 0, 3094 0, 3095 0, 3096 /*39*/ MatAXPY_MPIAIJ, 3097 MatGetSubMatrices_MPIAIJ, 3098 MatIncreaseOverlap_MPIAIJ, 3099 MatGetValues_MPIAIJ, 3100 MatCopy_MPIAIJ, 3101 /*44*/ MatGetRowMax_MPIAIJ, 3102 MatScale_MPIAIJ, 3103 0, 3104 0, 3105 MatZeroRowsColumns_MPIAIJ, 3106 /*49*/ MatSetRandom_MPIAIJ, 3107 0, 3108 0, 3109 0, 3110 0, 3111 /*54*/ MatFDColoringCreate_MPIXAIJ, 3112 0, 3113 MatSetUnfactored_MPIAIJ, 3114 MatPermute_MPIAIJ, 3115 0, 3116 /*59*/ MatGetSubMatrix_MPIAIJ, 3117 MatDestroy_MPIAIJ, 3118 MatView_MPIAIJ, 3119 0, 3120 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3121 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3122 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3123 0, 3124 0, 3125 0, 3126 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3127 MatGetRowMinAbs_MPIAIJ, 3128 0, 3129 MatSetColoring_MPIAIJ, 3130 0, 3131 MatSetValuesAdifor_MPIAIJ, 3132 /*75*/ MatFDColoringApply_AIJ, 3133 0, 3134 0, 3135 0, 3136 MatFindZeroDiagonals_MPIAIJ, 3137 /*80*/ 0, 3138 0, 3139 0, 3140 /*83*/ MatLoad_MPIAIJ, 3141 0, 3142 0, 3143 0, 3144 0, 3145 0, 3146 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3147 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3148 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3149 MatPtAP_MPIAIJ_MPIAIJ, 3150 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3151 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3152 0, 3153 0, 3154 0, 3155 0, 3156 /*99*/ 0, 3157 0, 3158 0, 3159 MatConjugate_MPIAIJ, 3160 0, 3161 /*104*/MatSetValuesRow_MPIAIJ, 3162 MatRealPart_MPIAIJ, 3163 MatImaginaryPart_MPIAIJ, 3164 0, 3165 0, 3166 /*109*/0, 3167 MatGetRedundantMatrix_MPIAIJ, 3168 MatGetRowMin_MPIAIJ, 3169 0, 3170 0, 3171 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3172 0, 3173 0, 3174 0, 3175 0, 3176 /*119*/0, 3177 0, 3178 0, 3179 0, 3180 MatGetMultiProcBlock_MPIAIJ, 3181 /*124*/MatFindNonzeroRows_MPIAIJ, 3182 MatGetColumnNorms_MPIAIJ, 3183 MatInvertBlockDiagonal_MPIAIJ, 3184 0, 3185 MatGetSubMatricesParallel_MPIAIJ, 3186 /*129*/0, 3187 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3188 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3189 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3190 0, 3191 /*134*/0, 3192 0, 3193 0, 3194 0, 3195 0, 3196 /*139*/0, 3197 0, 3198 0, 3199 MatFDColoringSetUp_MPIXAIJ 3200 }; 3201 3202 /* ----------------------------------------------------------------------------------------*/ 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3206 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3207 { 3208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3209 PetscErrorCode ierr; 3210 3211 PetscFunctionBegin; 3212 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3213 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 #undef __FUNCT__ 3218 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3219 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3220 { 3221 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3226 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3227 PetscFunctionReturn(0); 3228 } 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3232 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3233 { 3234 Mat_MPIAIJ *b; 3235 PetscErrorCode ierr; 3236 3237 PetscFunctionBegin; 3238 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3239 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3240 b = (Mat_MPIAIJ*)B->data; 3241 3242 if (!B->preallocated) { 3243 /* Explicitly create 2 MATSEQAIJ matrices. */ 3244 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3245 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3246 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3247 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3248 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3249 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3250 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3251 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3252 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3253 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3254 } 3255 3256 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3257 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3258 B->preallocated = PETSC_TRUE; 3259 PetscFunctionReturn(0); 3260 } 3261 3262 #undef __FUNCT__ 3263 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3264 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3265 { 3266 Mat mat; 3267 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3268 PetscErrorCode ierr; 3269 3270 PetscFunctionBegin; 3271 *newmat = 0; 3272 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3273 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3274 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3275 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3276 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3277 a = (Mat_MPIAIJ*)mat->data; 3278 3279 mat->factortype = matin->factortype; 3280 mat->assembled = PETSC_TRUE; 3281 mat->insertmode = NOT_SET_VALUES; 3282 mat->preallocated = PETSC_TRUE; 3283 3284 a->size = oldmat->size; 3285 a->rank = oldmat->rank; 3286 a->donotstash = oldmat->donotstash; 3287 a->roworiented = oldmat->roworiented; 3288 a->rowindices = 0; 3289 a->rowvalues = 0; 3290 a->getrowactive = PETSC_FALSE; 3291 3292 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3293 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3294 3295 if (oldmat->colmap) { 3296 #if defined(PETSC_USE_CTABLE) 3297 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3298 #else 3299 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3300 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3301 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3302 #endif 3303 } else a->colmap = 0; 3304 if (oldmat->garray) { 3305 PetscInt len; 3306 len = oldmat->B->cmap->n; 3307 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3308 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3309 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3310 } else a->garray = 0; 3311 3312 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3313 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3314 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3315 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3316 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3317 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3318 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3319 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3320 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3321 *newmat = mat; 3322 PetscFunctionReturn(0); 3323 } 3324 3325 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatLoad_MPIAIJ" 3329 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3330 { 3331 PetscScalar *vals,*svals; 3332 MPI_Comm comm; 3333 PetscErrorCode ierr; 3334 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3335 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3336 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3337 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3338 PetscInt cend,cstart,n,*rowners,sizesset=1; 3339 int fd; 3340 PetscInt bs = 1; 3341 3342 PetscFunctionBegin; 3343 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3344 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3345 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3346 if (!rank) { 3347 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3348 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3349 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3350 } 3351 3352 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3353 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3354 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3355 3356 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3357 3358 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3359 M = header[1]; N = header[2]; 3360 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3361 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3362 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3363 3364 /* If global sizes are set, check if they are consistent with that given in the file */ 3365 if (sizesset) { 3366 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3367 } 3368 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3369 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3370 3371 /* determine ownership of all (block) rows */ 3372 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3373 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3374 else m = newMat->rmap->n; /* Set by user */ 3375 3376 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3377 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3378 3379 /* First process needs enough room for process with most rows */ 3380 if (!rank) { 3381 mmax = rowners[1]; 3382 for (i=2; i<=size; i++) { 3383 mmax = PetscMax(mmax, rowners[i]); 3384 } 3385 } else mmax = -1; /* unused, but compilers complain */ 3386 3387 rowners[0] = 0; 3388 for (i=2; i<=size; i++) { 3389 rowners[i] += rowners[i-1]; 3390 } 3391 rstart = rowners[rank]; 3392 rend = rowners[rank+1]; 3393 3394 /* distribute row lengths to all processors */ 3395 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3396 if (!rank) { 3397 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3398 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3399 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3400 for (j=0; j<m; j++) { 3401 procsnz[0] += ourlens[j]; 3402 } 3403 for (i=1; i<size; i++) { 3404 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3405 /* calculate the number of nonzeros on each processor */ 3406 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3407 procsnz[i] += rowlengths[j]; 3408 } 3409 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3410 } 3411 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3412 } else { 3413 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3414 } 3415 3416 if (!rank) { 3417 /* determine max buffer needed and allocate it */ 3418 maxnz = 0; 3419 for (i=0; i<size; i++) { 3420 maxnz = PetscMax(maxnz,procsnz[i]); 3421 } 3422 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3423 3424 /* read in my part of the matrix column indices */ 3425 nz = procsnz[0]; 3426 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3427 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3428 3429 /* read in every one elses and ship off */ 3430 for (i=1; i<size; i++) { 3431 nz = procsnz[i]; 3432 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3433 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3434 } 3435 ierr = PetscFree(cols);CHKERRQ(ierr); 3436 } else { 3437 /* determine buffer space needed for message */ 3438 nz = 0; 3439 for (i=0; i<m; i++) { 3440 nz += ourlens[i]; 3441 } 3442 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3443 3444 /* receive message of column indices*/ 3445 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3446 } 3447 3448 /* determine column ownership if matrix is not square */ 3449 if (N != M) { 3450 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3451 else n = newMat->cmap->n; 3452 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3453 cstart = cend - n; 3454 } else { 3455 cstart = rstart; 3456 cend = rend; 3457 n = cend - cstart; 3458 } 3459 3460 /* loop over local rows, determining number of off diagonal entries */ 3461 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3462 jj = 0; 3463 for (i=0; i<m; i++) { 3464 for (j=0; j<ourlens[i]; j++) { 3465 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3466 jj++; 3467 } 3468 } 3469 3470 for (i=0; i<m; i++) { 3471 ourlens[i] -= offlens[i]; 3472 } 3473 if (!sizesset) { 3474 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3475 } 3476 3477 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3478 3479 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3480 3481 for (i=0; i<m; i++) { 3482 ourlens[i] += offlens[i]; 3483 } 3484 3485 if (!rank) { 3486 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3487 3488 /* read in my part of the matrix numerical values */ 3489 nz = procsnz[0]; 3490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3491 3492 /* insert into matrix */ 3493 jj = rstart; 3494 smycols = mycols; 3495 svals = vals; 3496 for (i=0; i<m; i++) { 3497 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3498 smycols += ourlens[i]; 3499 svals += ourlens[i]; 3500 jj++; 3501 } 3502 3503 /* read in other processors and ship out */ 3504 for (i=1; i<size; i++) { 3505 nz = procsnz[i]; 3506 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3507 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3510 } else { 3511 /* receive numeric values */ 3512 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3513 3514 /* receive message of values*/ 3515 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3516 3517 /* insert into matrix */ 3518 jj = rstart; 3519 smycols = mycols; 3520 svals = vals; 3521 for (i=0; i<m; i++) { 3522 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3523 smycols += ourlens[i]; 3524 svals += ourlens[i]; 3525 jj++; 3526 } 3527 } 3528 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3529 ierr = PetscFree(vals);CHKERRQ(ierr); 3530 ierr = PetscFree(mycols);CHKERRQ(ierr); 3531 ierr = PetscFree(rowners);CHKERRQ(ierr); 3532 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3533 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3539 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3540 { 3541 PetscErrorCode ierr; 3542 IS iscol_local; 3543 PetscInt csize; 3544 3545 PetscFunctionBegin; 3546 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3547 if (call == MAT_REUSE_MATRIX) { 3548 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3549 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3550 } else { 3551 PetscInt cbs; 3552 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3553 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3554 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3555 } 3556 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3557 if (call == MAT_INITIAL_MATRIX) { 3558 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3559 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3560 } 3561 PetscFunctionReturn(0); 3562 } 3563 3564 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3567 /* 3568 Not great since it makes two copies of the submatrix, first an SeqAIJ 3569 in local and then by concatenating the local matrices the end result. 3570 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3571 3572 Note: This requires a sequential iscol with all indices. 3573 */ 3574 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3575 { 3576 PetscErrorCode ierr; 3577 PetscMPIInt rank,size; 3578 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3579 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3580 PetscBool allcolumns, colflag; 3581 Mat M,Mreuse; 3582 MatScalar *vwork,*aa; 3583 MPI_Comm comm; 3584 Mat_SeqAIJ *aij; 3585 3586 PetscFunctionBegin; 3587 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3588 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3590 3591 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3592 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3593 if (colflag && ncol == mat->cmap->N) { 3594 allcolumns = PETSC_TRUE; 3595 } else { 3596 allcolumns = PETSC_FALSE; 3597 } 3598 if (call == MAT_REUSE_MATRIX) { 3599 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3600 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3601 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3602 } else { 3603 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3604 } 3605 3606 /* 3607 m - number of local rows 3608 n - number of columns (same on all processors) 3609 rstart - first row in new global matrix generated 3610 */ 3611 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3612 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3613 if (call == MAT_INITIAL_MATRIX) { 3614 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3615 ii = aij->i; 3616 jj = aij->j; 3617 3618 /* 3619 Determine the number of non-zeros in the diagonal and off-diagonal 3620 portions of the matrix in order to do correct preallocation 3621 */ 3622 3623 /* first get start and end of "diagonal" columns */ 3624 if (csize == PETSC_DECIDE) { 3625 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3626 if (mglobal == n) { /* square matrix */ 3627 nlocal = m; 3628 } else { 3629 nlocal = n/size + ((n % size) > rank); 3630 } 3631 } else { 3632 nlocal = csize; 3633 } 3634 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3635 rstart = rend - nlocal; 3636 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3637 3638 /* next, compute all the lengths */ 3639 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3640 olens = dlens + m; 3641 for (i=0; i<m; i++) { 3642 jend = ii[i+1] - ii[i]; 3643 olen = 0; 3644 dlen = 0; 3645 for (j=0; j<jend; j++) { 3646 if (*jj < rstart || *jj >= rend) olen++; 3647 else dlen++; 3648 jj++; 3649 } 3650 olens[i] = olen; 3651 dlens[i] = dlen; 3652 } 3653 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3654 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3655 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3656 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3657 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3658 ierr = PetscFree(dlens);CHKERRQ(ierr); 3659 } else { 3660 PetscInt ml,nl; 3661 3662 M = *newmat; 3663 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3664 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3665 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3666 /* 3667 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3668 rather than the slower MatSetValues(). 3669 */ 3670 M->was_assembled = PETSC_TRUE; 3671 M->assembled = PETSC_FALSE; 3672 } 3673 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3674 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3675 ii = aij->i; 3676 jj = aij->j; 3677 aa = aij->a; 3678 for (i=0; i<m; i++) { 3679 row = rstart + i; 3680 nz = ii[i+1] - ii[i]; 3681 cwork = jj; jj += nz; 3682 vwork = aa; aa += nz; 3683 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3684 } 3685 3686 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 *newmat = M; 3689 3690 /* save submatrix used in processor for next request */ 3691 if (call == MAT_INITIAL_MATRIX) { 3692 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3693 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3694 } 3695 PetscFunctionReturn(0); 3696 } 3697 3698 #undef __FUNCT__ 3699 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3700 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3701 { 3702 PetscInt m,cstart, cend,j,nnz,i,d; 3703 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3704 const PetscInt *JJ; 3705 PetscScalar *values; 3706 PetscErrorCode ierr; 3707 3708 PetscFunctionBegin; 3709 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3710 3711 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3712 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3713 m = B->rmap->n; 3714 cstart = B->cmap->rstart; 3715 cend = B->cmap->rend; 3716 rstart = B->rmap->rstart; 3717 3718 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3719 3720 #if defined(PETSC_USE_DEBUGGING) 3721 for (i=0; i<m; i++) { 3722 nnz = Ii[i+1]- Ii[i]; 3723 JJ = J + Ii[i]; 3724 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3725 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3726 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3727 } 3728 #endif 3729 3730 for (i=0; i<m; i++) { 3731 nnz = Ii[i+1]- Ii[i]; 3732 JJ = J + Ii[i]; 3733 nnz_max = PetscMax(nnz_max,nnz); 3734 d = 0; 3735 for (j=0; j<nnz; j++) { 3736 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3737 } 3738 d_nnz[i] = d; 3739 o_nnz[i] = nnz - d; 3740 } 3741 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3742 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3743 3744 if (v) values = (PetscScalar*)v; 3745 else { 3746 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3747 } 3748 3749 for (i=0; i<m; i++) { 3750 ii = i + rstart; 3751 nnz = Ii[i+1]- Ii[i]; 3752 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3753 } 3754 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 3757 if (!v) { 3758 ierr = PetscFree(values);CHKERRQ(ierr); 3759 } 3760 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3761 PetscFunctionReturn(0); 3762 } 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3766 /*@ 3767 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3768 (the default parallel PETSc format). 3769 3770 Collective on MPI_Comm 3771 3772 Input Parameters: 3773 + B - the matrix 3774 . i - the indices into j for the start of each local row (starts with zero) 3775 . j - the column indices for each local row (starts with zero) 3776 - v - optional values in the matrix 3777 3778 Level: developer 3779 3780 Notes: 3781 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3782 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3783 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3784 3785 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3786 3787 The format which is used for the sparse matrix input, is equivalent to a 3788 row-major ordering.. i.e for the following matrix, the input data expected is 3789 as shown: 3790 3791 1 0 0 3792 2 0 3 P0 3793 ------- 3794 4 5 6 P1 3795 3796 Process0 [P0]: rows_owned=[0,1] 3797 i = {0,1,3} [size = nrow+1 = 2+1] 3798 j = {0,0,2} [size = nz = 6] 3799 v = {1,2,3} [size = nz = 6] 3800 3801 Process1 [P1]: rows_owned=[2] 3802 i = {0,3} [size = nrow+1 = 1+1] 3803 j = {0,1,2} [size = nz = 6] 3804 v = {4,5,6} [size = nz = 6] 3805 3806 .keywords: matrix, aij, compressed row, sparse, parallel 3807 3808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3809 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3810 @*/ 3811 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3812 { 3813 PetscErrorCode ierr; 3814 3815 PetscFunctionBegin; 3816 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3817 PetscFunctionReturn(0); 3818 } 3819 3820 #undef __FUNCT__ 3821 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3822 /*@C 3823 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3824 (the default parallel PETSc format). For good matrix assembly performance 3825 the user should preallocate the matrix storage by setting the parameters 3826 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3827 performance can be increased by more than a factor of 50. 3828 3829 Collective on MPI_Comm 3830 3831 Input Parameters: 3832 + A - the matrix 3833 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3834 (same value is used for all local rows) 3835 . d_nnz - array containing the number of nonzeros in the various rows of the 3836 DIAGONAL portion of the local submatrix (possibly different for each row) 3837 or NULL, if d_nz is used to specify the nonzero structure. 3838 The size of this array is equal to the number of local rows, i.e 'm'. 3839 For matrices that will be factored, you must leave room for (and set) 3840 the diagonal entry even if it is zero. 3841 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3842 submatrix (same value is used for all local rows). 3843 - o_nnz - array containing the number of nonzeros in the various rows of the 3844 OFF-DIAGONAL portion of the local submatrix (possibly different for 3845 each row) or NULL, if o_nz is used to specify the nonzero 3846 structure. The size of this array is equal to the number 3847 of local rows, i.e 'm'. 3848 3849 If the *_nnz parameter is given then the *_nz parameter is ignored 3850 3851 The AIJ format (also called the Yale sparse matrix format or 3852 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3853 storage. The stored row and column indices begin with zero. 3854 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3855 3856 The parallel matrix is partitioned such that the first m0 rows belong to 3857 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3858 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3859 3860 The DIAGONAL portion of the local submatrix of a processor can be defined 3861 as the submatrix which is obtained by extraction the part corresponding to 3862 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3863 first row that belongs to the processor, r2 is the last row belonging to 3864 the this processor, and c1-c2 is range of indices of the local part of a 3865 vector suitable for applying the matrix to. This is an mxn matrix. In the 3866 common case of a square matrix, the row and column ranges are the same and 3867 the DIAGONAL part is also square. The remaining portion of the local 3868 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3869 3870 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3871 3872 You can call MatGetInfo() to get information on how effective the preallocation was; 3873 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3874 You can also run with the option -info and look for messages with the string 3875 malloc in them to see if additional memory allocation was needed. 3876 3877 Example usage: 3878 3879 Consider the following 8x8 matrix with 34 non-zero values, that is 3880 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3881 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3882 as follows: 3883 3884 .vb 3885 1 2 0 | 0 3 0 | 0 4 3886 Proc0 0 5 6 | 7 0 0 | 8 0 3887 9 0 10 | 11 0 0 | 12 0 3888 ------------------------------------- 3889 13 0 14 | 15 16 17 | 0 0 3890 Proc1 0 18 0 | 19 20 21 | 0 0 3891 0 0 0 | 22 23 0 | 24 0 3892 ------------------------------------- 3893 Proc2 25 26 27 | 0 0 28 | 29 0 3894 30 0 0 | 31 32 33 | 0 34 3895 .ve 3896 3897 This can be represented as a collection of submatrices as: 3898 3899 .vb 3900 A B C 3901 D E F 3902 G H I 3903 .ve 3904 3905 Where the submatrices A,B,C are owned by proc0, D,E,F are 3906 owned by proc1, G,H,I are owned by proc2. 3907 3908 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3909 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3910 The 'M','N' parameters are 8,8, and have the same values on all procs. 3911 3912 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3913 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3914 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3915 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3916 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3917 matrix, ans [DF] as another SeqAIJ matrix. 3918 3919 When d_nz, o_nz parameters are specified, d_nz storage elements are 3920 allocated for every row of the local diagonal submatrix, and o_nz 3921 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3922 One way to choose d_nz and o_nz is to use the max nonzerors per local 3923 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3924 In this case, the values of d_nz,o_nz are: 3925 .vb 3926 proc0 : dnz = 2, o_nz = 2 3927 proc1 : dnz = 3, o_nz = 2 3928 proc2 : dnz = 1, o_nz = 4 3929 .ve 3930 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3931 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3932 for proc3. i.e we are using 12+15+10=37 storage locations to store 3933 34 values. 3934 3935 When d_nnz, o_nnz parameters are specified, the storage is specified 3936 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3937 In the above case the values for d_nnz,o_nnz are: 3938 .vb 3939 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3940 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3941 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3942 .ve 3943 Here the space allocated is sum of all the above values i.e 34, and 3944 hence pre-allocation is perfect. 3945 3946 Level: intermediate 3947 3948 .keywords: matrix, aij, compressed row, sparse, parallel 3949 3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3951 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3952 @*/ 3953 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3954 { 3955 PetscErrorCode ierr; 3956 3957 PetscFunctionBegin; 3958 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3959 PetscValidType(B,1); 3960 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3961 PetscFunctionReturn(0); 3962 } 3963 3964 #undef __FUNCT__ 3965 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3966 /*@ 3967 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3968 CSR format the local rows. 3969 3970 Collective on MPI_Comm 3971 3972 Input Parameters: 3973 + comm - MPI communicator 3974 . m - number of local rows (Cannot be PETSC_DECIDE) 3975 . n - This value should be the same as the local size used in creating the 3976 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3977 calculated if N is given) For square matrices n is almost always m. 3978 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3979 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3980 . i - row indices 3981 . j - column indices 3982 - a - matrix values 3983 3984 Output Parameter: 3985 . mat - the matrix 3986 3987 Level: intermediate 3988 3989 Notes: 3990 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3991 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3992 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3993 3994 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3995 3996 The format which is used for the sparse matrix input, is equivalent to a 3997 row-major ordering.. i.e for the following matrix, the input data expected is 3998 as shown: 3999 4000 1 0 0 4001 2 0 3 P0 4002 ------- 4003 4 5 6 P1 4004 4005 Process0 [P0]: rows_owned=[0,1] 4006 i = {0,1,3} [size = nrow+1 = 2+1] 4007 j = {0,0,2} [size = nz = 6] 4008 v = {1,2,3} [size = nz = 6] 4009 4010 Process1 [P1]: rows_owned=[2] 4011 i = {0,3} [size = nrow+1 = 1+1] 4012 j = {0,1,2} [size = nz = 6] 4013 v = {4,5,6} [size = nz = 6] 4014 4015 .keywords: matrix, aij, compressed row, sparse, parallel 4016 4017 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4018 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4019 @*/ 4020 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4021 { 4022 PetscErrorCode ierr; 4023 4024 PetscFunctionBegin; 4025 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4026 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4027 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4028 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4029 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4030 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4031 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4032 PetscFunctionReturn(0); 4033 } 4034 4035 #undef __FUNCT__ 4036 #define __FUNCT__ "MatCreateAIJ" 4037 /*@C 4038 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4039 (the default parallel PETSc format). For good matrix assembly performance 4040 the user should preallocate the matrix storage by setting the parameters 4041 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4042 performance can be increased by more than a factor of 50. 4043 4044 Collective on MPI_Comm 4045 4046 Input Parameters: 4047 + comm - MPI communicator 4048 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4049 This value should be the same as the local size used in creating the 4050 y vector for the matrix-vector product y = Ax. 4051 . n - This value should be the same as the local size used in creating the 4052 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4053 calculated if N is given) For square matrices n is almost always m. 4054 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4055 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4056 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4057 (same value is used for all local rows) 4058 . d_nnz - array containing the number of nonzeros in the various rows of the 4059 DIAGONAL portion of the local submatrix (possibly different for each row) 4060 or NULL, if d_nz is used to specify the nonzero structure. 4061 The size of this array is equal to the number of local rows, i.e 'm'. 4062 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4063 submatrix (same value is used for all local rows). 4064 - o_nnz - array containing the number of nonzeros in the various rows of the 4065 OFF-DIAGONAL portion of the local submatrix (possibly different for 4066 each row) or NULL, if o_nz is used to specify the nonzero 4067 structure. The size of this array is equal to the number 4068 of local rows, i.e 'm'. 4069 4070 Output Parameter: 4071 . A - the matrix 4072 4073 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4074 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4075 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4076 4077 Notes: 4078 If the *_nnz parameter is given then the *_nz parameter is ignored 4079 4080 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4081 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4082 storage requirements for this matrix. 4083 4084 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4085 processor than it must be used on all processors that share the object for 4086 that argument. 4087 4088 The user MUST specify either the local or global matrix dimensions 4089 (possibly both). 4090 4091 The parallel matrix is partitioned across processors such that the 4092 first m0 rows belong to process 0, the next m1 rows belong to 4093 process 1, the next m2 rows belong to process 2 etc.. where 4094 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4095 values corresponding to [m x N] submatrix. 4096 4097 The columns are logically partitioned with the n0 columns belonging 4098 to 0th partition, the next n1 columns belonging to the next 4099 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4100 4101 The DIAGONAL portion of the local submatrix on any given processor 4102 is the submatrix corresponding to the rows and columns m,n 4103 corresponding to the given processor. i.e diagonal matrix on 4104 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4105 etc. The remaining portion of the local submatrix [m x (N-n)] 4106 constitute the OFF-DIAGONAL portion. The example below better 4107 illustrates this concept. 4108 4109 For a square global matrix we define each processor's diagonal portion 4110 to be its local rows and the corresponding columns (a square submatrix); 4111 each processor's off-diagonal portion encompasses the remainder of the 4112 local matrix (a rectangular submatrix). 4113 4114 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4115 4116 When calling this routine with a single process communicator, a matrix of 4117 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4118 type of communicator, use the construction mechanism: 4119 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4120 4121 By default, this format uses inodes (identical nodes) when possible. 4122 We search for consecutive rows with the same nonzero structure, thereby 4123 reusing matrix information to achieve increased efficiency. 4124 4125 Options Database Keys: 4126 + -mat_no_inode - Do not use inodes 4127 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4128 - -mat_aij_oneindex - Internally use indexing starting at 1 4129 rather than 0. Note that when calling MatSetValues(), 4130 the user still MUST index entries starting at 0! 4131 4132 4133 Example usage: 4134 4135 Consider the following 8x8 matrix with 34 non-zero values, that is 4136 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4137 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4138 as follows: 4139 4140 .vb 4141 1 2 0 | 0 3 0 | 0 4 4142 Proc0 0 5 6 | 7 0 0 | 8 0 4143 9 0 10 | 11 0 0 | 12 0 4144 ------------------------------------- 4145 13 0 14 | 15 16 17 | 0 0 4146 Proc1 0 18 0 | 19 20 21 | 0 0 4147 0 0 0 | 22 23 0 | 24 0 4148 ------------------------------------- 4149 Proc2 25 26 27 | 0 0 28 | 29 0 4150 30 0 0 | 31 32 33 | 0 34 4151 .ve 4152 4153 This can be represented as a collection of submatrices as: 4154 4155 .vb 4156 A B C 4157 D E F 4158 G H I 4159 .ve 4160 4161 Where the submatrices A,B,C are owned by proc0, D,E,F are 4162 owned by proc1, G,H,I are owned by proc2. 4163 4164 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4165 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4166 The 'M','N' parameters are 8,8, and have the same values on all procs. 4167 4168 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4169 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4170 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4171 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4172 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4173 matrix, ans [DF] as another SeqAIJ matrix. 4174 4175 When d_nz, o_nz parameters are specified, d_nz storage elements are 4176 allocated for every row of the local diagonal submatrix, and o_nz 4177 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4178 One way to choose d_nz and o_nz is to use the max nonzerors per local 4179 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4180 In this case, the values of d_nz,o_nz are: 4181 .vb 4182 proc0 : dnz = 2, o_nz = 2 4183 proc1 : dnz = 3, o_nz = 2 4184 proc2 : dnz = 1, o_nz = 4 4185 .ve 4186 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4187 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4188 for proc3. i.e we are using 12+15+10=37 storage locations to store 4189 34 values. 4190 4191 When d_nnz, o_nnz parameters are specified, the storage is specified 4192 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4193 In the above case the values for d_nnz,o_nnz are: 4194 .vb 4195 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4196 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4197 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4198 .ve 4199 Here the space allocated is sum of all the above values i.e 34, and 4200 hence pre-allocation is perfect. 4201 4202 Level: intermediate 4203 4204 .keywords: matrix, aij, compressed row, sparse, parallel 4205 4206 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4207 MPIAIJ, MatCreateMPIAIJWithArrays() 4208 @*/ 4209 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4210 { 4211 PetscErrorCode ierr; 4212 PetscMPIInt size; 4213 4214 PetscFunctionBegin; 4215 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4216 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4218 if (size > 1) { 4219 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4220 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4221 } else { 4222 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4223 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4224 } 4225 PetscFunctionReturn(0); 4226 } 4227 4228 #undef __FUNCT__ 4229 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4230 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4231 { 4232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4233 4234 PetscFunctionBegin; 4235 *Ad = a->A; 4236 *Ao = a->B; 4237 *colmap = a->garray; 4238 PetscFunctionReturn(0); 4239 } 4240 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4243 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4244 { 4245 PetscErrorCode ierr; 4246 PetscInt i; 4247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4248 4249 PetscFunctionBegin; 4250 if (coloring->ctype == IS_COLORING_GLOBAL) { 4251 ISColoringValue *allcolors,*colors; 4252 ISColoring ocoloring; 4253 4254 /* set coloring for diagonal portion */ 4255 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4256 4257 /* set coloring for off-diagonal portion */ 4258 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4259 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4260 for (i=0; i<a->B->cmap->n; i++) { 4261 colors[i] = allcolors[a->garray[i]]; 4262 } 4263 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4264 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4265 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4266 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4267 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4268 ISColoringValue *colors; 4269 PetscInt *larray; 4270 ISColoring ocoloring; 4271 4272 /* set coloring for diagonal portion */ 4273 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4274 for (i=0; i<a->A->cmap->n; i++) { 4275 larray[i] = i + A->cmap->rstart; 4276 } 4277 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4278 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->A->cmap->n; i++) { 4280 colors[i] = coloring->colors[larray[i]]; 4281 } 4282 ierr = PetscFree(larray);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 4287 /* set coloring for off-diagonal portion */ 4288 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4289 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4290 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4291 for (i=0; i<a->B->cmap->n; i++) { 4292 colors[i] = coloring->colors[larray[i]]; 4293 } 4294 ierr = PetscFree(larray);CHKERRQ(ierr); 4295 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4296 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4297 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4298 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4299 PetscFunctionReturn(0); 4300 } 4301 4302 #undef __FUNCT__ 4303 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4304 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4305 { 4306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4307 PetscErrorCode ierr; 4308 4309 PetscFunctionBegin; 4310 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4311 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4317 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4318 { 4319 PetscErrorCode ierr; 4320 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4321 PetscInt *indx; 4322 4323 PetscFunctionBegin; 4324 /* This routine will ONLY return MPIAIJ type matrix */ 4325 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4326 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4327 if (n == PETSC_DECIDE) { 4328 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4329 } 4330 /* Check sum(n) = N */ 4331 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4332 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4333 4334 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4335 rstart -= m; 4336 4337 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4338 for (i=0; i<m; i++) { 4339 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4340 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4341 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4342 } 4343 4344 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4345 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4346 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4347 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4348 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4349 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4350 PetscFunctionReturn(0); 4351 } 4352 4353 #undef __FUNCT__ 4354 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4355 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4356 { 4357 PetscErrorCode ierr; 4358 PetscInt m,N,i,rstart,nnz,Ii; 4359 PetscInt *indx; 4360 PetscScalar *values; 4361 4362 PetscFunctionBegin; 4363 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4364 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4365 for (i=0; i<m; i++) { 4366 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4367 Ii = i + rstart; 4368 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4369 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4370 } 4371 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4372 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4373 PetscFunctionReturn(0); 4374 } 4375 4376 #undef __FUNCT__ 4377 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4378 /*@ 4379 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4380 matrices from each processor 4381 4382 Collective on MPI_Comm 4383 4384 Input Parameters: 4385 + comm - the communicators the parallel matrix will live on 4386 . inmat - the input sequential matrices 4387 . n - number of local columns (or PETSC_DECIDE) 4388 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4389 4390 Output Parameter: 4391 . outmat - the parallel matrix generated 4392 4393 Level: advanced 4394 4395 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4396 4397 @*/ 4398 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4399 { 4400 PetscErrorCode ierr; 4401 PetscMPIInt size; 4402 4403 PetscFunctionBegin; 4404 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4405 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4406 if (size == 1) { 4407 if (scall == MAT_INITIAL_MATRIX) { 4408 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4409 } else { 4410 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4411 } 4412 } else { 4413 if (scall == MAT_INITIAL_MATRIX) { 4414 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4415 } 4416 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4417 } 4418 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "MatFileSplit" 4424 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4425 { 4426 PetscErrorCode ierr; 4427 PetscMPIInt rank; 4428 PetscInt m,N,i,rstart,nnz; 4429 size_t len; 4430 const PetscInt *indx; 4431 PetscViewer out; 4432 char *name; 4433 Mat B; 4434 const PetscScalar *values; 4435 4436 PetscFunctionBegin; 4437 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4438 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4439 /* Should this be the type of the diagonal block of A? */ 4440 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4441 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4442 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4443 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4444 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4445 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4446 for (i=0; i<m; i++) { 4447 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4448 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4449 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4450 } 4451 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4452 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 4454 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4455 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4456 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4457 sprintf(name,"%s.%d",outfile,rank); 4458 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4459 ierr = PetscFree(name);CHKERRQ(ierr); 4460 ierr = MatView(B,out);CHKERRQ(ierr); 4461 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4462 ierr = MatDestroy(&B);CHKERRQ(ierr); 4463 PetscFunctionReturn(0); 4464 } 4465 4466 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4469 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4470 { 4471 PetscErrorCode ierr; 4472 Mat_Merge_SeqsToMPI *merge; 4473 PetscContainer container; 4474 4475 PetscFunctionBegin; 4476 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4477 if (container) { 4478 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4479 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4480 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4481 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4482 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4483 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4484 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4485 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4486 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4487 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4491 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4492 ierr = PetscFree(merge);CHKERRQ(ierr); 4493 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4494 } 4495 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 #include <../src/mat/utils/freespace.h> 4500 #include <petscbt.h> 4501 4502 #undef __FUNCT__ 4503 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4504 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4505 { 4506 PetscErrorCode ierr; 4507 MPI_Comm comm; 4508 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4509 PetscMPIInt size,rank,taga,*len_s; 4510 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4511 PetscInt proc,m; 4512 PetscInt **buf_ri,**buf_rj; 4513 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4514 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4515 MPI_Request *s_waits,*r_waits; 4516 MPI_Status *status; 4517 MatScalar *aa=a->a; 4518 MatScalar **abuf_r,*ba_i; 4519 Mat_Merge_SeqsToMPI *merge; 4520 PetscContainer container; 4521 4522 PetscFunctionBegin; 4523 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4524 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4525 4526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4528 4529 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4530 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4531 4532 bi = merge->bi; 4533 bj = merge->bj; 4534 buf_ri = merge->buf_ri; 4535 buf_rj = merge->buf_rj; 4536 4537 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4538 owners = merge->rowmap->range; 4539 len_s = merge->len_s; 4540 4541 /* send and recv matrix values */ 4542 /*-----------------------------*/ 4543 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4544 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4545 4546 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4547 for (proc=0,k=0; proc<size; proc++) { 4548 if (!len_s[proc]) continue; 4549 i = owners[proc]; 4550 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4551 k++; 4552 } 4553 4554 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4555 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4556 ierr = PetscFree(status);CHKERRQ(ierr); 4557 4558 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4559 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4560 4561 /* insert mat values of mpimat */ 4562 /*----------------------------*/ 4563 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4564 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4565 4566 for (k=0; k<merge->nrecv; k++) { 4567 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4568 nrows = *(buf_ri_k[k]); 4569 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4570 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4571 } 4572 4573 /* set values of ba */ 4574 m = merge->rowmap->n; 4575 for (i=0; i<m; i++) { 4576 arow = owners[rank] + i; 4577 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4578 bnzi = bi[i+1] - bi[i]; 4579 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4580 4581 /* add local non-zero vals of this proc's seqmat into ba */ 4582 anzi = ai[arow+1] - ai[arow]; 4583 aj = a->j + ai[arow]; 4584 aa = a->a + ai[arow]; 4585 nextaj = 0; 4586 for (j=0; nextaj<anzi; j++) { 4587 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4588 ba_i[j] += aa[nextaj++]; 4589 } 4590 } 4591 4592 /* add received vals into ba */ 4593 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4594 /* i-th row */ 4595 if (i == *nextrow[k]) { 4596 anzi = *(nextai[k]+1) - *nextai[k]; 4597 aj = buf_rj[k] + *(nextai[k]); 4598 aa = abuf_r[k] + *(nextai[k]); 4599 nextaj = 0; 4600 for (j=0; nextaj<anzi; j++) { 4601 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4602 ba_i[j] += aa[nextaj++]; 4603 } 4604 } 4605 nextrow[k]++; nextai[k]++; 4606 } 4607 } 4608 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4609 } 4610 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 4613 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4614 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4615 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4616 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4617 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4618 PetscFunctionReturn(0); 4619 } 4620 4621 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4622 4623 #undef __FUNCT__ 4624 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4625 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4626 { 4627 PetscErrorCode ierr; 4628 Mat B_mpi; 4629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4630 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4631 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4632 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4633 PetscInt len,proc,*dnz,*onz,bs,cbs; 4634 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4635 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4636 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4637 MPI_Status *status; 4638 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4639 PetscBT lnkbt; 4640 Mat_Merge_SeqsToMPI *merge; 4641 PetscContainer container; 4642 4643 PetscFunctionBegin; 4644 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4645 4646 /* make sure it is a PETSc comm */ 4647 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4648 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4649 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4650 4651 ierr = PetscNew(&merge);CHKERRQ(ierr); 4652 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4653 4654 /* determine row ownership */ 4655 /*---------------------------------------------------------*/ 4656 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4657 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4658 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4659 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4660 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4661 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4662 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4663 4664 m = merge->rowmap->n; 4665 owners = merge->rowmap->range; 4666 4667 /* determine the number of messages to send, their lengths */ 4668 /*---------------------------------------------------------*/ 4669 len_s = merge->len_s; 4670 4671 len = 0; /* length of buf_si[] */ 4672 merge->nsend = 0; 4673 for (proc=0; proc<size; proc++) { 4674 len_si[proc] = 0; 4675 if (proc == rank) { 4676 len_s[proc] = 0; 4677 } else { 4678 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4679 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4680 } 4681 if (len_s[proc]) { 4682 merge->nsend++; 4683 nrows = 0; 4684 for (i=owners[proc]; i<owners[proc+1]; i++) { 4685 if (ai[i+1] > ai[i]) nrows++; 4686 } 4687 len_si[proc] = 2*(nrows+1); 4688 len += len_si[proc]; 4689 } 4690 } 4691 4692 /* determine the number and length of messages to receive for ij-structure */ 4693 /*-------------------------------------------------------------------------*/ 4694 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4695 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4696 4697 /* post the Irecv of j-structure */ 4698 /*-------------------------------*/ 4699 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4700 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4701 4702 /* post the Isend of j-structure */ 4703 /*--------------------------------*/ 4704 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4705 4706 for (proc=0, k=0; proc<size; proc++) { 4707 if (!len_s[proc]) continue; 4708 i = owners[proc]; 4709 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4710 k++; 4711 } 4712 4713 /* receives and sends of j-structure are complete */ 4714 /*------------------------------------------------*/ 4715 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4716 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4717 4718 /* send and recv i-structure */ 4719 /*---------------------------*/ 4720 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4721 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4722 4723 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4724 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4725 for (proc=0,k=0; proc<size; proc++) { 4726 if (!len_s[proc]) continue; 4727 /* form outgoing message for i-structure: 4728 buf_si[0]: nrows to be sent 4729 [1:nrows]: row index (global) 4730 [nrows+1:2*nrows+1]: i-structure index 4731 */ 4732 /*-------------------------------------------*/ 4733 nrows = len_si[proc]/2 - 1; 4734 buf_si_i = buf_si + nrows+1; 4735 buf_si[0] = nrows; 4736 buf_si_i[0] = 0; 4737 nrows = 0; 4738 for (i=owners[proc]; i<owners[proc+1]; i++) { 4739 anzi = ai[i+1] - ai[i]; 4740 if (anzi) { 4741 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4742 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4743 nrows++; 4744 } 4745 } 4746 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4747 k++; 4748 buf_si += len_si[proc]; 4749 } 4750 4751 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4752 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4753 4754 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4755 for (i=0; i<merge->nrecv; i++) { 4756 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4757 } 4758 4759 ierr = PetscFree(len_si);CHKERRQ(ierr); 4760 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4761 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4762 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4763 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4764 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4765 ierr = PetscFree(status);CHKERRQ(ierr); 4766 4767 /* compute a local seq matrix in each processor */ 4768 /*----------------------------------------------*/ 4769 /* allocate bi array and free space for accumulating nonzero column info */ 4770 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4771 bi[0] = 0; 4772 4773 /* create and initialize a linked list */ 4774 nlnk = N+1; 4775 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4776 4777 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4778 len = ai[owners[rank+1]] - ai[owners[rank]]; 4779 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4780 4781 current_space = free_space; 4782 4783 /* determine symbolic info for each local row */ 4784 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4785 4786 for (k=0; k<merge->nrecv; k++) { 4787 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4788 nrows = *buf_ri_k[k]; 4789 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4790 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4791 } 4792 4793 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4794 len = 0; 4795 for (i=0; i<m; i++) { 4796 bnzi = 0; 4797 /* add local non-zero cols of this proc's seqmat into lnk */ 4798 arow = owners[rank] + i; 4799 anzi = ai[arow+1] - ai[arow]; 4800 aj = a->j + ai[arow]; 4801 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4802 bnzi += nlnk; 4803 /* add received col data into lnk */ 4804 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4805 if (i == *nextrow[k]) { /* i-th row */ 4806 anzi = *(nextai[k]+1) - *nextai[k]; 4807 aj = buf_rj[k] + *nextai[k]; 4808 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4809 bnzi += nlnk; 4810 nextrow[k]++; nextai[k]++; 4811 } 4812 } 4813 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4814 4815 /* if free space is not available, make more free space */ 4816 if (current_space->local_remaining<bnzi) { 4817 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4818 nspacedouble++; 4819 } 4820 /* copy data into free space, then initialize lnk */ 4821 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4822 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4823 4824 current_space->array += bnzi; 4825 current_space->local_used += bnzi; 4826 current_space->local_remaining -= bnzi; 4827 4828 bi[i+1] = bi[i] + bnzi; 4829 } 4830 4831 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4832 4833 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4834 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4835 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4836 4837 /* create symbolic parallel matrix B_mpi */ 4838 /*---------------------------------------*/ 4839 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4840 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4841 if (n==PETSC_DECIDE) { 4842 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4843 } else { 4844 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4845 } 4846 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4847 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4848 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4849 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4850 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4851 4852 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4853 B_mpi->assembled = PETSC_FALSE; 4854 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4855 merge->bi = bi; 4856 merge->bj = bj; 4857 merge->buf_ri = buf_ri; 4858 merge->buf_rj = buf_rj; 4859 merge->coi = NULL; 4860 merge->coj = NULL; 4861 merge->owners_co = NULL; 4862 4863 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4864 4865 /* attach the supporting struct to B_mpi for reuse */ 4866 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4867 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4868 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4869 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4870 *mpimat = B_mpi; 4871 4872 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4878 /*@C 4879 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4880 matrices from each processor 4881 4882 Collective on MPI_Comm 4883 4884 Input Parameters: 4885 + comm - the communicators the parallel matrix will live on 4886 . seqmat - the input sequential matrices 4887 . m - number of local rows (or PETSC_DECIDE) 4888 . n - number of local columns (or PETSC_DECIDE) 4889 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4890 4891 Output Parameter: 4892 . mpimat - the parallel matrix generated 4893 4894 Level: advanced 4895 4896 Notes: 4897 The dimensions of the sequential matrix in each processor MUST be the same. 4898 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4899 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4900 @*/ 4901 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4902 { 4903 PetscErrorCode ierr; 4904 PetscMPIInt size; 4905 4906 PetscFunctionBegin; 4907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4908 if (size == 1) { 4909 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4910 if (scall == MAT_INITIAL_MATRIX) { 4911 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4912 } else { 4913 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4914 } 4915 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 if (scall == MAT_INITIAL_MATRIX) { 4920 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4921 } 4922 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4923 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 PetscFunctionReturn(0); 4925 } 4926 4927 #undef __FUNCT__ 4928 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4929 /*@ 4930 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4931 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4932 with MatGetSize() 4933 4934 Not Collective 4935 4936 Input Parameters: 4937 + A - the matrix 4938 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4939 4940 Output Parameter: 4941 . A_loc - the local sequential matrix generated 4942 4943 Level: developer 4944 4945 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4946 4947 @*/ 4948 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4949 { 4950 PetscErrorCode ierr; 4951 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4952 Mat_SeqAIJ *mat,*a,*b; 4953 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4954 MatScalar *aa,*ba,*cam; 4955 PetscScalar *ca; 4956 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4957 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4958 PetscBool match; 4959 4960 PetscFunctionBegin; 4961 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4962 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4963 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4964 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4965 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4966 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4967 aa = a->a; ba = b->a; 4968 if (scall == MAT_INITIAL_MATRIX) { 4969 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4970 ci[0] = 0; 4971 for (i=0; i<am; i++) { 4972 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4973 } 4974 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4975 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4976 k = 0; 4977 for (i=0; i<am; i++) { 4978 ncols_o = bi[i+1] - bi[i]; 4979 ncols_d = ai[i+1] - ai[i]; 4980 /* off-diagonal portion of A */ 4981 for (jo=0; jo<ncols_o; jo++) { 4982 col = cmap[*bj]; 4983 if (col >= cstart) break; 4984 cj[k] = col; bj++; 4985 ca[k++] = *ba++; 4986 } 4987 /* diagonal portion of A */ 4988 for (j=0; j<ncols_d; j++) { 4989 cj[k] = cstart + *aj++; 4990 ca[k++] = *aa++; 4991 } 4992 /* off-diagonal portion of A */ 4993 for (j=jo; j<ncols_o; j++) { 4994 cj[k] = cmap[*bj++]; 4995 ca[k++] = *ba++; 4996 } 4997 } 4998 /* put together the new matrix */ 4999 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5000 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5001 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5002 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5003 mat->free_a = PETSC_TRUE; 5004 mat->free_ij = PETSC_TRUE; 5005 mat->nonew = 0; 5006 } else if (scall == MAT_REUSE_MATRIX) { 5007 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5008 ci = mat->i; cj = mat->j; cam = mat->a; 5009 for (i=0; i<am; i++) { 5010 /* off-diagonal portion of A */ 5011 ncols_o = bi[i+1] - bi[i]; 5012 for (jo=0; jo<ncols_o; jo++) { 5013 col = cmap[*bj]; 5014 if (col >= cstart) break; 5015 *cam++ = *ba++; bj++; 5016 } 5017 /* diagonal portion of A */ 5018 ncols_d = ai[i+1] - ai[i]; 5019 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5020 /* off-diagonal portion of A */ 5021 for (j=jo; j<ncols_o; j++) { 5022 *cam++ = *ba++; bj++; 5023 } 5024 } 5025 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5026 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5027 PetscFunctionReturn(0); 5028 } 5029 5030 #undef __FUNCT__ 5031 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5032 /*@C 5033 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5034 5035 Not Collective 5036 5037 Input Parameters: 5038 + A - the matrix 5039 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5040 - row, col - index sets of rows and columns to extract (or NULL) 5041 5042 Output Parameter: 5043 . A_loc - the local sequential matrix generated 5044 5045 Level: developer 5046 5047 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5048 5049 @*/ 5050 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5051 { 5052 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5053 PetscErrorCode ierr; 5054 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5055 IS isrowa,iscola; 5056 Mat *aloc; 5057 PetscBool match; 5058 5059 PetscFunctionBegin; 5060 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5061 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5062 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5063 if (!row) { 5064 start = A->rmap->rstart; end = A->rmap->rend; 5065 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5066 } else { 5067 isrowa = *row; 5068 } 5069 if (!col) { 5070 start = A->cmap->rstart; 5071 cmap = a->garray; 5072 nzA = a->A->cmap->n; 5073 nzB = a->B->cmap->n; 5074 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5075 ncols = 0; 5076 for (i=0; i<nzB; i++) { 5077 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5078 else break; 5079 } 5080 imark = i; 5081 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5082 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5083 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5084 } else { 5085 iscola = *col; 5086 } 5087 if (scall != MAT_INITIAL_MATRIX) { 5088 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5089 aloc[0] = *A_loc; 5090 } 5091 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5092 *A_loc = aloc[0]; 5093 ierr = PetscFree(aloc);CHKERRQ(ierr); 5094 if (!row) { 5095 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5096 } 5097 if (!col) { 5098 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5099 } 5100 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 #undef __FUNCT__ 5105 #define __FUNCT__ "MatGetBrowsOfAcols" 5106 /*@C 5107 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5108 5109 Collective on Mat 5110 5111 Input Parameters: 5112 + A,B - the matrices in mpiaij format 5113 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5114 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5115 5116 Output Parameter: 5117 + rowb, colb - index sets of rows and columns of B to extract 5118 - B_seq - the sequential matrix generated 5119 5120 Level: developer 5121 5122 @*/ 5123 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5124 { 5125 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5126 PetscErrorCode ierr; 5127 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5128 IS isrowb,iscolb; 5129 Mat *bseq=NULL; 5130 5131 PetscFunctionBegin; 5132 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5133 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5134 } 5135 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5136 5137 if (scall == MAT_INITIAL_MATRIX) { 5138 start = A->cmap->rstart; 5139 cmap = a->garray; 5140 nzA = a->A->cmap->n; 5141 nzB = a->B->cmap->n; 5142 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5143 ncols = 0; 5144 for (i=0; i<nzB; i++) { /* row < local row index */ 5145 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5146 else break; 5147 } 5148 imark = i; 5149 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5150 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5151 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5152 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5153 } else { 5154 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5155 isrowb = *rowb; iscolb = *colb; 5156 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5157 bseq[0] = *B_seq; 5158 } 5159 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5160 *B_seq = bseq[0]; 5161 ierr = PetscFree(bseq);CHKERRQ(ierr); 5162 if (!rowb) { 5163 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5164 } else { 5165 *rowb = isrowb; 5166 } 5167 if (!colb) { 5168 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5169 } else { 5170 *colb = iscolb; 5171 } 5172 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5173 PetscFunctionReturn(0); 5174 } 5175 5176 #undef __FUNCT__ 5177 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5178 /* 5179 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5180 of the OFF-DIAGONAL portion of local A 5181 5182 Collective on Mat 5183 5184 Input Parameters: 5185 + A,B - the matrices in mpiaij format 5186 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5187 5188 Output Parameter: 5189 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5190 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5191 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5192 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5193 5194 Level: developer 5195 5196 */ 5197 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5198 { 5199 VecScatter_MPI_General *gen_to,*gen_from; 5200 PetscErrorCode ierr; 5201 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5202 Mat_SeqAIJ *b_oth; 5203 VecScatter ctx =a->Mvctx; 5204 MPI_Comm comm; 5205 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5206 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5207 PetscScalar *rvalues,*svalues; 5208 MatScalar *b_otha,*bufa,*bufA; 5209 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5210 MPI_Request *rwaits = NULL,*swaits = NULL; 5211 MPI_Status *sstatus,rstatus; 5212 PetscMPIInt jj; 5213 PetscInt *cols,sbs,rbs; 5214 PetscScalar *vals; 5215 5216 PetscFunctionBegin; 5217 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5218 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5219 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5220 } 5221 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5222 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5223 5224 gen_to = (VecScatter_MPI_General*)ctx->todata; 5225 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5226 rvalues = gen_from->values; /* holds the length of receiving row */ 5227 svalues = gen_to->values; /* holds the length of sending row */ 5228 nrecvs = gen_from->n; 5229 nsends = gen_to->n; 5230 5231 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5232 srow = gen_to->indices; /* local row index to be sent */ 5233 sstarts = gen_to->starts; 5234 sprocs = gen_to->procs; 5235 sstatus = gen_to->sstatus; 5236 sbs = gen_to->bs; 5237 rstarts = gen_from->starts; 5238 rprocs = gen_from->procs; 5239 rbs = gen_from->bs; 5240 5241 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5242 if (scall == MAT_INITIAL_MATRIX) { 5243 /* i-array */ 5244 /*---------*/ 5245 /* post receives */ 5246 for (i=0; i<nrecvs; i++) { 5247 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5248 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5249 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5250 } 5251 5252 /* pack the outgoing message */ 5253 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5254 5255 sstartsj[0] = 0; 5256 rstartsj[0] = 0; 5257 len = 0; /* total length of j or a array to be sent */ 5258 k = 0; 5259 for (i=0; i<nsends; i++) { 5260 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5261 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5262 for (j=0; j<nrows; j++) { 5263 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5264 for (l=0; l<sbs; l++) { 5265 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5266 5267 rowlen[j*sbs+l] = ncols; 5268 5269 len += ncols; 5270 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5271 } 5272 k++; 5273 } 5274 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5275 5276 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5277 } 5278 /* recvs and sends of i-array are completed */ 5279 i = nrecvs; 5280 while (i--) { 5281 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5282 } 5283 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5284 5285 /* allocate buffers for sending j and a arrays */ 5286 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5287 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5288 5289 /* create i-array of B_oth */ 5290 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5291 5292 b_othi[0] = 0; 5293 len = 0; /* total length of j or a array to be received */ 5294 k = 0; 5295 for (i=0; i<nrecvs; i++) { 5296 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5297 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5298 for (j=0; j<nrows; j++) { 5299 b_othi[k+1] = b_othi[k] + rowlen[j]; 5300 len += rowlen[j]; k++; 5301 } 5302 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5303 } 5304 5305 /* allocate space for j and a arrrays of B_oth */ 5306 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5307 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5308 5309 /* j-array */ 5310 /*---------*/ 5311 /* post receives of j-array */ 5312 for (i=0; i<nrecvs; i++) { 5313 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5314 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5315 } 5316 5317 /* pack the outgoing message j-array */ 5318 k = 0; 5319 for (i=0; i<nsends; i++) { 5320 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5321 bufJ = bufj+sstartsj[i]; 5322 for (j=0; j<nrows; j++) { 5323 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5324 for (ll=0; ll<sbs; ll++) { 5325 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5326 for (l=0; l<ncols; l++) { 5327 *bufJ++ = cols[l]; 5328 } 5329 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5330 } 5331 } 5332 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5333 } 5334 5335 /* recvs and sends of j-array are completed */ 5336 i = nrecvs; 5337 while (i--) { 5338 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5339 } 5340 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5341 } else if (scall == MAT_REUSE_MATRIX) { 5342 sstartsj = *startsj_s; 5343 rstartsj = *startsj_r; 5344 bufa = *bufa_ptr; 5345 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5346 b_otha = b_oth->a; 5347 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5348 5349 /* a-array */ 5350 /*---------*/ 5351 /* post receives of a-array */ 5352 for (i=0; i<nrecvs; i++) { 5353 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5354 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5355 } 5356 5357 /* pack the outgoing message a-array */ 5358 k = 0; 5359 for (i=0; i<nsends; i++) { 5360 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5361 bufA = bufa+sstartsj[i]; 5362 for (j=0; j<nrows; j++) { 5363 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5364 for (ll=0; ll<sbs; ll++) { 5365 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5366 for (l=0; l<ncols; l++) { 5367 *bufA++ = vals[l]; 5368 } 5369 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5370 } 5371 } 5372 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5373 } 5374 /* recvs and sends of a-array are completed */ 5375 i = nrecvs; 5376 while (i--) { 5377 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5378 } 5379 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5380 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5381 5382 if (scall == MAT_INITIAL_MATRIX) { 5383 /* put together the new matrix */ 5384 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5385 5386 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5387 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5388 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5389 b_oth->free_a = PETSC_TRUE; 5390 b_oth->free_ij = PETSC_TRUE; 5391 b_oth->nonew = 0; 5392 5393 ierr = PetscFree(bufj);CHKERRQ(ierr); 5394 if (!startsj_s || !bufa_ptr) { 5395 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5396 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5397 } else { 5398 *startsj_s = sstartsj; 5399 *startsj_r = rstartsj; 5400 *bufa_ptr = bufa; 5401 } 5402 } 5403 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5404 PetscFunctionReturn(0); 5405 } 5406 5407 #undef __FUNCT__ 5408 #define __FUNCT__ "MatGetCommunicationStructs" 5409 /*@C 5410 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5411 5412 Not Collective 5413 5414 Input Parameters: 5415 . A - The matrix in mpiaij format 5416 5417 Output Parameter: 5418 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5419 . colmap - A map from global column index to local index into lvec 5420 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5421 5422 Level: developer 5423 5424 @*/ 5425 #if defined(PETSC_USE_CTABLE) 5426 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5427 #else 5428 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5429 #endif 5430 { 5431 Mat_MPIAIJ *a; 5432 5433 PetscFunctionBegin; 5434 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5435 PetscValidPointer(lvec, 2); 5436 PetscValidPointer(colmap, 3); 5437 PetscValidPointer(multScatter, 4); 5438 a = (Mat_MPIAIJ*) A->data; 5439 if (lvec) *lvec = a->lvec; 5440 if (colmap) *colmap = a->colmap; 5441 if (multScatter) *multScatter = a->Mvctx; 5442 PetscFunctionReturn(0); 5443 } 5444 5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5448 5449 #undef __FUNCT__ 5450 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5451 /* 5452 Computes (B'*A')' since computing B*A directly is untenable 5453 5454 n p p 5455 ( ) ( ) ( ) 5456 m ( A ) * n ( B ) = m ( C ) 5457 ( ) ( ) ( ) 5458 5459 */ 5460 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5461 { 5462 PetscErrorCode ierr; 5463 Mat At,Bt,Ct; 5464 5465 PetscFunctionBegin; 5466 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5467 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5468 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5469 ierr = MatDestroy(&At);CHKERRQ(ierr); 5470 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5471 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5472 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5473 PetscFunctionReturn(0); 5474 } 5475 5476 #undef __FUNCT__ 5477 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5478 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5479 { 5480 PetscErrorCode ierr; 5481 PetscInt m=A->rmap->n,n=B->cmap->n; 5482 Mat Cmat; 5483 5484 PetscFunctionBegin; 5485 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5486 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5487 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5488 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5489 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5490 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5491 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5492 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5493 5494 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5495 5496 *C = Cmat; 5497 PetscFunctionReturn(0); 5498 } 5499 5500 /* ----------------------------------------------------------------*/ 5501 #undef __FUNCT__ 5502 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5503 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5504 { 5505 PetscErrorCode ierr; 5506 5507 PetscFunctionBegin; 5508 if (scall == MAT_INITIAL_MATRIX) { 5509 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5510 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5511 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5512 } 5513 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5514 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5515 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5516 PetscFunctionReturn(0); 5517 } 5518 5519 #if defined(PETSC_HAVE_MUMPS) 5520 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5521 #endif 5522 #if defined(PETSC_HAVE_PASTIX) 5523 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5524 #endif 5525 #if defined(PETSC_HAVE_SUPERLU_DIST) 5526 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5527 #endif 5528 #if defined(PETSC_HAVE_CLIQUE) 5529 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5530 #endif 5531 5532 /*MC 5533 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5534 5535 Options Database Keys: 5536 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5537 5538 Level: beginner 5539 5540 .seealso: MatCreateAIJ() 5541 M*/ 5542 5543 #undef __FUNCT__ 5544 #define __FUNCT__ "MatCreate_MPIAIJ" 5545 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5546 { 5547 Mat_MPIAIJ *b; 5548 PetscErrorCode ierr; 5549 PetscMPIInt size; 5550 5551 PetscFunctionBegin; 5552 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5553 5554 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5555 B->data = (void*)b; 5556 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5557 B->assembled = PETSC_FALSE; 5558 B->insertmode = NOT_SET_VALUES; 5559 b->size = size; 5560 5561 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5562 5563 /* build cache for off array entries formed */ 5564 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5565 5566 b->donotstash = PETSC_FALSE; 5567 b->colmap = 0; 5568 b->garray = 0; 5569 b->roworiented = PETSC_TRUE; 5570 5571 /* stuff used for matrix vector multiply */ 5572 b->lvec = NULL; 5573 b->Mvctx = NULL; 5574 5575 /* stuff for MatGetRow() */ 5576 b->rowindices = 0; 5577 b->rowvalues = 0; 5578 b->getrowactive = PETSC_FALSE; 5579 5580 /* flexible pointer used in CUSP/CUSPARSE classes */ 5581 b->spptr = NULL; 5582 5583 #if defined(PETSC_HAVE_MUMPS) 5584 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5585 #endif 5586 #if defined(PETSC_HAVE_PASTIX) 5587 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5588 #endif 5589 #if defined(PETSC_HAVE_SUPERLU_DIST) 5590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5591 #endif 5592 #if defined(PETSC_HAVE_CLIQUE) 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5594 #endif 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5609 PetscFunctionReturn(0); 5610 } 5611 5612 #undef __FUNCT__ 5613 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5614 /*@ 5615 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5616 and "off-diagonal" part of the matrix in CSR format. 5617 5618 Collective on MPI_Comm 5619 5620 Input Parameters: 5621 + comm - MPI communicator 5622 . m - number of local rows (Cannot be PETSC_DECIDE) 5623 . n - This value should be the same as the local size used in creating the 5624 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5625 calculated if N is given) For square matrices n is almost always m. 5626 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5627 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5628 . i - row indices for "diagonal" portion of matrix 5629 . j - column indices 5630 . a - matrix values 5631 . oi - row indices for "off-diagonal" portion of matrix 5632 . oj - column indices 5633 - oa - matrix values 5634 5635 Output Parameter: 5636 . mat - the matrix 5637 5638 Level: advanced 5639 5640 Notes: 5641 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5642 must free the arrays once the matrix has been destroyed and not before. 5643 5644 The i and j indices are 0 based 5645 5646 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5647 5648 This sets local rows and cannot be used to set off-processor values. 5649 5650 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5651 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5652 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5653 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5654 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5655 communication if it is known that only local entries will be set. 5656 5657 .keywords: matrix, aij, compressed row, sparse, parallel 5658 5659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5660 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5661 @*/ 5662 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5663 { 5664 PetscErrorCode ierr; 5665 Mat_MPIAIJ *maij; 5666 5667 PetscFunctionBegin; 5668 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5669 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5670 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5671 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5672 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5673 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5674 maij = (Mat_MPIAIJ*) (*mat)->data; 5675 5676 (*mat)->preallocated = PETSC_TRUE; 5677 5678 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5679 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5680 5681 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5682 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5683 5684 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 5689 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5692 PetscFunctionReturn(0); 5693 } 5694 5695 /* 5696 Special version for direct calls from Fortran 5697 */ 5698 #include <petsc-private/fortranimpl.h> 5699 5700 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5701 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5702 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5703 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5704 #endif 5705 5706 /* Change these macros so can be used in void function */ 5707 #undef CHKERRQ 5708 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5709 #undef SETERRQ2 5710 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5711 #undef SETERRQ3 5712 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ 5714 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5715 5716 #undef __FUNCT__ 5717 #define __FUNCT__ "matsetvaluesmpiaij_" 5718 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5719 { 5720 Mat mat = *mmat; 5721 PetscInt m = *mm, n = *mn; 5722 InsertMode addv = *maddv; 5723 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5724 PetscScalar value; 5725 PetscErrorCode ierr; 5726 5727 MatCheckPreallocated(mat,1); 5728 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5729 5730 #if defined(PETSC_USE_DEBUG) 5731 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5732 #endif 5733 { 5734 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5735 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5736 PetscBool roworiented = aij->roworiented; 5737 5738 /* Some Variables required in the macro */ 5739 Mat A = aij->A; 5740 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5741 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5742 MatScalar *aa = a->a; 5743 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5744 Mat B = aij->B; 5745 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5746 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5747 MatScalar *ba = b->a; 5748 5749 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5750 PetscInt nonew = a->nonew; 5751 MatScalar *ap1,*ap2; 5752 5753 PetscFunctionBegin; 5754 for (i=0; i<m; i++) { 5755 if (im[i] < 0) continue; 5756 #if defined(PETSC_USE_DEBUG) 5757 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5758 #endif 5759 if (im[i] >= rstart && im[i] < rend) { 5760 row = im[i] - rstart; 5761 lastcol1 = -1; 5762 rp1 = aj + ai[row]; 5763 ap1 = aa + ai[row]; 5764 rmax1 = aimax[row]; 5765 nrow1 = ailen[row]; 5766 low1 = 0; 5767 high1 = nrow1; 5768 lastcol2 = -1; 5769 rp2 = bj + bi[row]; 5770 ap2 = ba + bi[row]; 5771 rmax2 = bimax[row]; 5772 nrow2 = bilen[row]; 5773 low2 = 0; 5774 high2 = nrow2; 5775 5776 for (j=0; j<n; j++) { 5777 if (roworiented) value = v[i*n+j]; 5778 else value = v[i+j*m]; 5779 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5780 if (in[j] >= cstart && in[j] < cend) { 5781 col = in[j] - cstart; 5782 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5783 } else if (in[j] < 0) continue; 5784 #if defined(PETSC_USE_DEBUG) 5785 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5786 #endif 5787 else { 5788 if (mat->was_assembled) { 5789 if (!aij->colmap) { 5790 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5791 } 5792 #if defined(PETSC_USE_CTABLE) 5793 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5794 col--; 5795 #else 5796 col = aij->colmap[in[j]] - 1; 5797 #endif 5798 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5799 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5800 col = in[j]; 5801 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5802 B = aij->B; 5803 b = (Mat_SeqAIJ*)B->data; 5804 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5805 rp2 = bj + bi[row]; 5806 ap2 = ba + bi[row]; 5807 rmax2 = bimax[row]; 5808 nrow2 = bilen[row]; 5809 low2 = 0; 5810 high2 = nrow2; 5811 bm = aij->B->rmap->n; 5812 ba = b->a; 5813 } 5814 } else col = in[j]; 5815 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5816 } 5817 } 5818 } else if (!aij->donotstash) { 5819 if (roworiented) { 5820 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5821 } else { 5822 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5823 } 5824 } 5825 } 5826 } 5827 PetscFunctionReturnVoid(); 5828 } 5829 5830