1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 #if defined(PETSC_HAVE_TXPETSCGPU) 1830 // Sorry, I don't know how else to add MatOptions for data specific 1831 // to GPU classes without protecting against it in this case statement. 1832 // If anyone knows of a better way, let me know. Paul 1833 case MAT_DIAGBLOCK_CSR: 1834 case MAT_OFFDIAGBLOCK_CSR: 1835 case MAT_CSR: 1836 case MAT_DIAGBLOCK_DIA: 1837 case MAT_OFFDIAGBLOCK_DIA: 1838 case MAT_DIA: 1839 case MAT_DIAGBLOCK_ELL: 1840 case MAT_OFFDIAGBLOCK_ELL: 1841 case MAT_ELL: 1842 case MAT_DIAGBLOCK_HYB: 1843 case MAT_OFFDIAGBLOCK_HYB: 1844 case MAT_HYB: 1845 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1846 break; 1847 #endif 1848 default: 1849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatGetRow_MPIAIJ" 1856 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1859 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1860 PetscErrorCode ierr; 1861 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1862 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1863 PetscInt *cmap,*idx_p; 1864 1865 PetscFunctionBegin; 1866 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1867 mat->getrowactive = PETSC_TRUE; 1868 1869 if (!mat->rowvalues && (idx || v)) { 1870 /* 1871 allocate enough space to hold information from the longest row. 1872 */ 1873 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1874 PetscInt max = 1,tmp; 1875 for (i=0; i<matin->rmap->n; i++) { 1876 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1877 if (max < tmp) { max = tmp; } 1878 } 1879 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1880 } 1881 1882 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1883 lrow = row - rstart; 1884 1885 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1886 if (!v) {pvA = 0; pvB = 0;} 1887 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1888 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1889 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1890 nztot = nzA + nzB; 1891 1892 cmap = mat->garray; 1893 if (v || idx) { 1894 if (nztot) { 1895 /* Sort by increasing column numbers, assuming A and B already sorted */ 1896 PetscInt imark = -1; 1897 if (v) { 1898 *v = v_p = mat->rowvalues; 1899 for (i=0; i<nzB; i++) { 1900 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1901 else break; 1902 } 1903 imark = i; 1904 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1905 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1906 } 1907 if (idx) { 1908 *idx = idx_p = mat->rowindices; 1909 if (imark > -1) { 1910 for (i=0; i<imark; i++) { 1911 idx_p[i] = cmap[cworkB[i]]; 1912 } 1913 } else { 1914 for (i=0; i<nzB; i++) { 1915 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1916 else break; 1917 } 1918 imark = i; 1919 } 1920 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1921 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1922 } 1923 } else { 1924 if (idx) *idx = 0; 1925 if (v) *v = 0; 1926 } 1927 } 1928 *nz = nztot; 1929 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1930 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNCT__ 1935 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1936 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1937 { 1938 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1939 1940 PetscFunctionBegin; 1941 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1942 aij->getrowactive = PETSC_FALSE; 1943 PetscFunctionReturn(0); 1944 } 1945 1946 #undef __FUNCT__ 1947 #define __FUNCT__ "MatNorm_MPIAIJ" 1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1949 { 1950 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1951 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1952 PetscErrorCode ierr; 1953 PetscInt i,j,cstart = mat->cmap->rstart; 1954 PetscReal sum = 0.0; 1955 MatScalar *v; 1956 1957 PetscFunctionBegin; 1958 if (aij->size == 1) { 1959 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1960 } else { 1961 if (type == NORM_FROBENIUS) { 1962 v = amat->a; 1963 for (i=0; i<amat->nz; i++) { 1964 #if defined(PETSC_USE_COMPLEX) 1965 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1966 #else 1967 sum += (*v)*(*v); v++; 1968 #endif 1969 } 1970 v = bmat->a; 1971 for (i=0; i<bmat->nz; i++) { 1972 #if defined(PETSC_USE_COMPLEX) 1973 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1974 #else 1975 sum += (*v)*(*v); v++; 1976 #endif 1977 } 1978 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1979 *norm = PetscSqrtReal(*norm); 1980 } else if (type == NORM_1) { /* max column norm */ 1981 PetscReal *tmp,*tmp2; 1982 PetscInt *jj,*garray = aij->garray; 1983 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1984 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1985 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1986 *norm = 0.0; 1987 v = amat->a; jj = amat->j; 1988 for (j=0; j<amat->nz; j++) { 1989 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1990 } 1991 v = bmat->a; jj = bmat->j; 1992 for (j=0; j<bmat->nz; j++) { 1993 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1994 } 1995 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1996 for (j=0; j<mat->cmap->N; j++) { 1997 if (tmp2[j] > *norm) *norm = tmp2[j]; 1998 } 1999 ierr = PetscFree(tmp);CHKERRQ(ierr); 2000 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2001 } else if (type == NORM_INFINITY) { /* max row norm */ 2002 PetscReal ntemp = 0.0; 2003 for (j=0; j<aij->A->rmap->n; j++) { 2004 v = amat->a + amat->i[j]; 2005 sum = 0.0; 2006 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 v = bmat->a + bmat->i[j]; 2010 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2011 sum += PetscAbsScalar(*v); v++; 2012 } 2013 if (sum > ntemp) ntemp = sum; 2014 } 2015 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2016 } else { 2017 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2018 } 2019 } 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatTranspose_MPIAIJ" 2025 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2028 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2029 PetscErrorCode ierr; 2030 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2031 PetscInt cstart=A->cmap->rstart,ncol; 2032 Mat B; 2033 MatScalar *array; 2034 2035 PetscFunctionBegin; 2036 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2037 2038 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2039 ai = Aloc->i; aj = Aloc->j; 2040 bi = Bloc->i; bj = Bloc->j; 2041 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2042 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2043 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2044 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2045 for (i=0; i<ai[ma]; i++){ 2046 d_nnz[aj[i]] ++; 2047 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2048 } 2049 2050 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2051 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2052 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2053 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2054 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2055 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2056 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2057 } else { 2058 B = *matout; 2059 } 2060 2061 /* copy over the A part */ 2062 array = Aloc->a; 2063 row = A->rmap->rstart; 2064 for (i=0; i<ma; i++) { 2065 ncol = ai[i+1]-ai[i]; 2066 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2067 row++; array += ncol; aj += ncol; 2068 } 2069 aj = Aloc->j; 2070 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2071 2072 /* copy over the B part */ 2073 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2074 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2075 array = Bloc->a; 2076 row = A->rmap->rstart; 2077 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2078 cols_tmp = cols; 2079 for (i=0; i<mb; i++) { 2080 ncol = bi[i+1]-bi[i]; 2081 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2082 row++; array += ncol; cols_tmp += ncol; 2083 } 2084 ierr = PetscFree(cols);CHKERRQ(ierr); 2085 2086 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2089 *matout = B; 2090 } else { 2091 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2099 { 2100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2101 Mat a = aij->A,b = aij->B; 2102 PetscErrorCode ierr; 2103 PetscInt s1,s2,s3; 2104 2105 PetscFunctionBegin; 2106 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2107 if (rr) { 2108 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2109 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2110 /* Overlap communication with computation. */ 2111 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 } 2113 if (ll) { 2114 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2115 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2116 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2117 } 2118 /* scale the diagonal block */ 2119 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2120 2121 if (rr) { 2122 /* Do a scatter end and then right scale the off-diagonal block */ 2123 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2125 } 2126 2127 PetscFunctionReturn(0); 2128 } 2129 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2132 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2135 PetscErrorCode ierr; 2136 2137 PetscFunctionBegin; 2138 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatEqual_MPIAIJ" 2144 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2145 { 2146 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2147 Mat a,b,c,d; 2148 PetscBool flg; 2149 PetscErrorCode ierr; 2150 2151 PetscFunctionBegin; 2152 a = matA->A; b = matA->B; 2153 c = matB->A; d = matB->B; 2154 2155 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2156 if (flg) { 2157 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2158 } 2159 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 #undef __FUNCT__ 2164 #define __FUNCT__ "MatCopy_MPIAIJ" 2165 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2166 { 2167 PetscErrorCode ierr; 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2169 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2170 2171 PetscFunctionBegin; 2172 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2173 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2174 /* because of the column compression in the off-processor part of the matrix a->B, 2175 the number of columns in a->B and b->B may be different, hence we cannot call 2176 the MatCopy() directly on the two parts. If need be, we can provide a more 2177 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2178 then copying the submatrices */ 2179 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2180 } else { 2181 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2182 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2183 } 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatSetUp_MPIAIJ" 2189 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2190 { 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 #undef __FUNCT__ 2199 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2200 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2201 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2202 { 2203 PetscInt i,m=Y->rmap->N; 2204 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2205 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2206 const PetscInt *xi = x->i,*yi = y->i; 2207 2208 PetscFunctionBegin; 2209 /* Set the number of nonzeros in the new matrix */ 2210 for(i=0; i<m; i++) { 2211 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2212 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2213 nnz[i] = 0; 2214 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2215 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2216 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2217 nnz[i]++; 2218 } 2219 for (; k<nzy; k++) nnz[i]++; 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 #undef __FUNCT__ 2225 #define __FUNCT__ "MatAXPY_MPIAIJ" 2226 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2227 { 2228 PetscErrorCode ierr; 2229 PetscInt i; 2230 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2231 PetscBLASInt bnz,one=1; 2232 Mat_SeqAIJ *x,*y; 2233 2234 PetscFunctionBegin; 2235 if (str == SAME_NONZERO_PATTERN) { 2236 PetscScalar alpha = a; 2237 x = (Mat_SeqAIJ *)xx->A->data; 2238 y = (Mat_SeqAIJ *)yy->A->data; 2239 bnz = PetscBLASIntCast(x->nz); 2240 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2241 x = (Mat_SeqAIJ *)xx->B->data; 2242 y = (Mat_SeqAIJ *)yy->B->data; 2243 bnz = PetscBLASIntCast(x->nz); 2244 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2245 } else if (str == SUBSET_NONZERO_PATTERN) { 2246 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2247 2248 x = (Mat_SeqAIJ *)xx->B->data; 2249 y = (Mat_SeqAIJ *)yy->B->data; 2250 if (y->xtoy && y->XtoY != xx->B) { 2251 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2252 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2253 } 2254 if (!y->xtoy) { /* get xtoy */ 2255 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2256 y->XtoY = xx->B; 2257 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2258 } 2259 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2260 } else { 2261 Mat B; 2262 PetscInt *nnz_d,*nnz_o; 2263 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2264 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2265 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2266 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2267 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2268 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2269 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2270 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2271 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2272 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2273 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2274 ierr = MatHeaderReplace(Y,B); 2275 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2276 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2277 } 2278 PetscFunctionReturn(0); 2279 } 2280 2281 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2282 2283 #undef __FUNCT__ 2284 #define __FUNCT__ "MatConjugate_MPIAIJ" 2285 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2286 { 2287 #if defined(PETSC_USE_COMPLEX) 2288 PetscErrorCode ierr; 2289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2290 2291 PetscFunctionBegin; 2292 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2293 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2294 #else 2295 PetscFunctionBegin; 2296 #endif 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatRealPart_MPIAIJ" 2302 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2309 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2315 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2316 { 2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2322 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #ifdef PETSC_HAVE_PBGL 2327 2328 #include <boost/parallel/mpi/bsp_process_group.hpp> 2329 #include <boost/graph/distributed/ilu_default_graph.hpp> 2330 #include <boost/graph/distributed/ilu_0_block.hpp> 2331 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2332 #include <boost/graph/distributed/petsc/interface.hpp> 2333 #include <boost/multi_array.hpp> 2334 #include <boost/parallel/distributed_property_map->hpp> 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2338 /* 2339 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2340 */ 2341 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2342 { 2343 namespace petsc = boost::distributed::petsc; 2344 2345 namespace graph_dist = boost::graph::distributed; 2346 using boost::graph::distributed::ilu_default::process_group_type; 2347 using boost::graph::ilu_permuted; 2348 2349 PetscBool row_identity, col_identity; 2350 PetscContainer c; 2351 PetscInt m, n, M, N; 2352 PetscErrorCode ierr; 2353 2354 PetscFunctionBegin; 2355 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2356 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2357 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2358 if (!row_identity || !col_identity) { 2359 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2360 } 2361 2362 process_group_type pg; 2363 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2364 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2365 lgraph_type& level_graph = *lgraph_p; 2366 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2367 2368 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2369 ilu_permuted(level_graph); 2370 2371 /* put together the new matrix */ 2372 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2373 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2374 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2375 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2376 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2377 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2378 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2379 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2380 2381 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2382 ierr = PetscContainerSetPointer(c, lgraph_p); 2383 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2384 ierr = PetscContainerDestroy(&c); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2390 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2391 { 2392 PetscFunctionBegin; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatSolve_MPIAIJ" 2398 /* 2399 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2400 */ 2401 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2402 { 2403 namespace graph_dist = boost::graph::distributed; 2404 2405 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2406 lgraph_type* lgraph_p; 2407 PetscContainer c; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2412 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2413 ierr = VecCopy(b, x);CHKERRQ(ierr); 2414 2415 PetscScalar* array_x; 2416 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2417 PetscInt sx; 2418 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2419 2420 PetscScalar* array_b; 2421 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2422 PetscInt sb; 2423 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2424 2425 lgraph_type& level_graph = *lgraph_p; 2426 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2427 2428 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2429 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2430 ref_x(array_x, boost::extents[num_vertices(graph)]); 2431 2432 typedef boost::iterator_property_map<array_ref_type::iterator, 2433 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2434 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2435 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2436 2437 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2438 2439 PetscFunctionReturn(0); 2440 } 2441 #endif 2442 2443 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2444 PetscInt nzlocal,nsends,nrecvs; 2445 PetscMPIInt *send_rank,*recv_rank; 2446 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2447 PetscScalar *sbuf_a,**rbuf_a; 2448 PetscErrorCode (*Destroy)(Mat); 2449 } Mat_Redundant; 2450 2451 #undef __FUNCT__ 2452 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2453 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2454 { 2455 PetscErrorCode ierr; 2456 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2457 PetscInt i; 2458 2459 PetscFunctionBegin; 2460 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2461 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2462 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2463 for (i=0; i<redund->nrecvs; i++){ 2464 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2465 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2468 ierr = PetscFree(redund);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 #undef __FUNCT__ 2473 #define __FUNCT__ "MatDestroy_MatRedundant" 2474 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2475 { 2476 PetscErrorCode ierr; 2477 PetscContainer container; 2478 Mat_Redundant *redund=PETSC_NULL; 2479 2480 PetscFunctionBegin; 2481 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2482 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2483 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2484 A->ops->destroy = redund->Destroy; 2485 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2486 if (A->ops->destroy) { 2487 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2488 } 2489 PetscFunctionReturn(0); 2490 } 2491 2492 #undef __FUNCT__ 2493 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2494 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2495 { 2496 PetscMPIInt rank,size; 2497 MPI_Comm comm=((PetscObject)mat)->comm; 2498 PetscErrorCode ierr; 2499 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2500 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2501 PetscInt *rowrange=mat->rmap->range; 2502 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2503 Mat A=aij->A,B=aij->B,C=*matredundant; 2504 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2505 PetscScalar *sbuf_a; 2506 PetscInt nzlocal=a->nz+b->nz; 2507 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2508 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2509 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2510 MatScalar *aworkA,*aworkB; 2511 PetscScalar *vals; 2512 PetscMPIInt tag1,tag2,tag3,imdex; 2513 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2514 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2515 MPI_Status recv_status,*send_status; 2516 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2517 PetscInt **rbuf_j=PETSC_NULL; 2518 PetscScalar **rbuf_a=PETSC_NULL; 2519 Mat_Redundant *redund=PETSC_NULL; 2520 PetscContainer container; 2521 2522 PetscFunctionBegin; 2523 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2524 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2525 2526 if (reuse == MAT_REUSE_MATRIX) { 2527 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2528 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2529 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2530 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2531 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2532 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2533 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2534 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2535 2536 nsends = redund->nsends; 2537 nrecvs = redund->nrecvs; 2538 send_rank = redund->send_rank; 2539 recv_rank = redund->recv_rank; 2540 sbuf_nz = redund->sbuf_nz; 2541 rbuf_nz = redund->rbuf_nz; 2542 sbuf_j = redund->sbuf_j; 2543 sbuf_a = redund->sbuf_a; 2544 rbuf_j = redund->rbuf_j; 2545 rbuf_a = redund->rbuf_a; 2546 } 2547 2548 if (reuse == MAT_INITIAL_MATRIX){ 2549 PetscMPIInt subrank,subsize; 2550 PetscInt nleftover,np_subcomm; 2551 /* get the destination processors' id send_rank, nsends and nrecvs */ 2552 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2553 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2554 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2555 np_subcomm = size/nsubcomm; 2556 nleftover = size - nsubcomm*np_subcomm; 2557 nsends = 0; nrecvs = 0; 2558 for (i=0; i<size; i++){ /* i=rank*/ 2559 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2560 send_rank[nsends] = i; nsends++; 2561 recv_rank[nrecvs++] = i; 2562 } 2563 } 2564 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2565 i = size-nleftover-1; 2566 j = 0; 2567 while (j < nsubcomm - nleftover){ 2568 send_rank[nsends++] = i; 2569 i--; j++; 2570 } 2571 } 2572 2573 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2574 for (i=0; i<nleftover; i++){ 2575 recv_rank[nrecvs++] = size-nleftover+i; 2576 } 2577 } 2578 2579 /* allocate sbuf_j, sbuf_a */ 2580 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2581 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2582 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2583 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2584 2585 /* copy mat's local entries into the buffers */ 2586 if (reuse == MAT_INITIAL_MATRIX){ 2587 rownz_max = 0; 2588 rptr = sbuf_j; 2589 cols = sbuf_j + rend-rstart + 1; 2590 vals = sbuf_a; 2591 rptr[0] = 0; 2592 for (i=0; i<rend-rstart; i++){ 2593 row = i + rstart; 2594 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2595 ncols = nzA + nzB; 2596 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2597 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2598 /* load the column indices for this row into cols */ 2599 lwrite = 0; 2600 for (l=0; l<nzB; l++) { 2601 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2602 vals[lwrite] = aworkB[l]; 2603 cols[lwrite++] = ctmp; 2604 } 2605 } 2606 for (l=0; l<nzA; l++){ 2607 vals[lwrite] = aworkA[l]; 2608 cols[lwrite++] = cstart + cworkA[l]; 2609 } 2610 for (l=0; l<nzB; l++) { 2611 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2612 vals[lwrite] = aworkB[l]; 2613 cols[lwrite++] = ctmp; 2614 } 2615 } 2616 vals += ncols; 2617 cols += ncols; 2618 rptr[i+1] = rptr[i] + ncols; 2619 if (rownz_max < ncols) rownz_max = ncols; 2620 } 2621 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2622 } else { /* only copy matrix values into sbuf_a */ 2623 rptr = sbuf_j; 2624 vals = sbuf_a; 2625 rptr[0] = 0; 2626 for (i=0; i<rend-rstart; i++){ 2627 row = i + rstart; 2628 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2629 ncols = nzA + nzB; 2630 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2631 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2632 lwrite = 0; 2633 for (l=0; l<nzB; l++) { 2634 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2635 } 2636 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2637 for (l=0; l<nzB; l++) { 2638 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2639 } 2640 vals += ncols; 2641 rptr[i+1] = rptr[i] + ncols; 2642 } 2643 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2644 2645 /* send nzlocal to others, and recv other's nzlocal */ 2646 /*--------------------------------------------------*/ 2647 if (reuse == MAT_INITIAL_MATRIX){ 2648 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2649 s_waits2 = s_waits3 + nsends; 2650 s_waits1 = s_waits2 + nsends; 2651 r_waits1 = s_waits1 + nsends; 2652 r_waits2 = r_waits1 + nrecvs; 2653 r_waits3 = r_waits2 + nrecvs; 2654 } else { 2655 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2656 r_waits3 = s_waits3 + nsends; 2657 } 2658 2659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2660 if (reuse == MAT_INITIAL_MATRIX){ 2661 /* get new tags to keep the communication clean */ 2662 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2663 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2664 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2665 2666 /* post receives of other's nzlocal */ 2667 for (i=0; i<nrecvs; i++){ 2668 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2669 } 2670 /* send nzlocal to others */ 2671 for (i=0; i<nsends; i++){ 2672 sbuf_nz[i] = nzlocal; 2673 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2674 } 2675 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2676 count = nrecvs; 2677 while (count) { 2678 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2679 recv_rank[imdex] = recv_status.MPI_SOURCE; 2680 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2681 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2682 2683 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2684 rbuf_nz[imdex] += i + 2; 2685 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2686 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2687 count--; 2688 } 2689 /* wait on sends of nzlocal */ 2690 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2691 /* send mat->i,j to others, and recv from other's */ 2692 /*------------------------------------------------*/ 2693 for (i=0; i<nsends; i++){ 2694 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2695 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2696 } 2697 /* wait on receives of mat->i,j */ 2698 /*------------------------------*/ 2699 count = nrecvs; 2700 while (count) { 2701 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2702 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2703 count--; 2704 } 2705 /* wait on sends of mat->i,j */ 2706 /*---------------------------*/ 2707 if (nsends) { 2708 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2709 } 2710 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2711 2712 /* post receives, send and receive mat->a */ 2713 /*----------------------------------------*/ 2714 for (imdex=0; imdex<nrecvs; imdex++) { 2715 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2716 } 2717 for (i=0; i<nsends; i++){ 2718 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2719 } 2720 count = nrecvs; 2721 while (count) { 2722 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2723 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2724 count--; 2725 } 2726 if (nsends) { 2727 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2728 } 2729 2730 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2731 2732 /* create redundant matrix */ 2733 /*-------------------------*/ 2734 if (reuse == MAT_INITIAL_MATRIX){ 2735 /* compute rownz_max for preallocation */ 2736 for (imdex=0; imdex<nrecvs; imdex++){ 2737 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2738 rptr = rbuf_j[imdex]; 2739 for (i=0; i<j; i++){ 2740 ncols = rptr[i+1] - rptr[i]; 2741 if (rownz_max < ncols) rownz_max = ncols; 2742 } 2743 } 2744 2745 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2746 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2747 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2748 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2749 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2750 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2751 } else { 2752 C = *matredundant; 2753 } 2754 2755 /* insert local matrix entries */ 2756 rptr = sbuf_j; 2757 cols = sbuf_j + rend-rstart + 1; 2758 vals = sbuf_a; 2759 for (i=0; i<rend-rstart; i++){ 2760 row = i + rstart; 2761 ncols = rptr[i+1] - rptr[i]; 2762 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2763 vals += ncols; 2764 cols += ncols; 2765 } 2766 /* insert received matrix entries */ 2767 for (imdex=0; imdex<nrecvs; imdex++){ 2768 rstart = rowrange[recv_rank[imdex]]; 2769 rend = rowrange[recv_rank[imdex]+1]; 2770 rptr = rbuf_j[imdex]; 2771 cols = rbuf_j[imdex] + rend-rstart + 1; 2772 vals = rbuf_a[imdex]; 2773 for (i=0; i<rend-rstart; i++){ 2774 row = i + rstart; 2775 ncols = rptr[i+1] - rptr[i]; 2776 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2777 vals += ncols; 2778 cols += ncols; 2779 } 2780 } 2781 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2784 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2785 if (reuse == MAT_INITIAL_MATRIX) { 2786 PetscContainer container; 2787 *matredundant = C; 2788 /* create a supporting struct and attach it to C for reuse */ 2789 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2790 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2791 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2792 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2793 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2794 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2795 2796 redund->nzlocal = nzlocal; 2797 redund->nsends = nsends; 2798 redund->nrecvs = nrecvs; 2799 redund->send_rank = send_rank; 2800 redund->recv_rank = recv_rank; 2801 redund->sbuf_nz = sbuf_nz; 2802 redund->rbuf_nz = rbuf_nz; 2803 redund->sbuf_j = sbuf_j; 2804 redund->sbuf_a = sbuf_a; 2805 redund->rbuf_j = rbuf_j; 2806 redund->rbuf_a = rbuf_a; 2807 2808 redund->Destroy = C->ops->destroy; 2809 C->ops->destroy = MatDestroy_MatRedundant; 2810 } 2811 PetscFunctionReturn(0); 2812 } 2813 2814 #undef __FUNCT__ 2815 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2816 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2817 { 2818 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2819 PetscErrorCode ierr; 2820 PetscInt i,*idxb = 0; 2821 PetscScalar *va,*vb; 2822 Vec vtmp; 2823 2824 PetscFunctionBegin; 2825 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2826 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2827 if (idx) { 2828 for (i=0; i<A->rmap->n; i++) { 2829 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2830 } 2831 } 2832 2833 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2834 if (idx) { 2835 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2836 } 2837 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2838 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2839 2840 for (i=0; i<A->rmap->n; i++){ 2841 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2842 va[i] = vb[i]; 2843 if (idx) idx[i] = a->garray[idxb[i]]; 2844 } 2845 } 2846 2847 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2848 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2849 ierr = PetscFree(idxb);CHKERRQ(ierr); 2850 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2856 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2857 { 2858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2859 PetscErrorCode ierr; 2860 PetscInt i,*idxb = 0; 2861 PetscScalar *va,*vb; 2862 Vec vtmp; 2863 2864 PetscFunctionBegin; 2865 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2866 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2867 if (idx) { 2868 for (i=0; i<A->cmap->n; i++) { 2869 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2870 } 2871 } 2872 2873 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2874 if (idx) { 2875 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2876 } 2877 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2878 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2879 2880 for (i=0; i<A->rmap->n; i++){ 2881 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2882 va[i] = vb[i]; 2883 if (idx) idx[i] = a->garray[idxb[i]]; 2884 } 2885 } 2886 2887 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2889 ierr = PetscFree(idxb);CHKERRQ(ierr); 2890 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2896 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2899 PetscInt n = A->rmap->n; 2900 PetscInt cstart = A->cmap->rstart; 2901 PetscInt *cmap = mat->garray; 2902 PetscInt *diagIdx, *offdiagIdx; 2903 Vec diagV, offdiagV; 2904 PetscScalar *a, *diagA, *offdiagA; 2905 PetscInt r; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2911 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2912 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2913 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2914 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2915 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2916 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2917 for(r = 0; r < n; ++r) { 2918 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2919 a[r] = diagA[r]; 2920 idx[r] = cstart + diagIdx[r]; 2921 } else { 2922 a[r] = offdiagA[r]; 2923 idx[r] = cmap[offdiagIdx[r]]; 2924 } 2925 } 2926 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2930 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2931 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2937 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2938 { 2939 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2940 PetscInt n = A->rmap->n; 2941 PetscInt cstart = A->cmap->rstart; 2942 PetscInt *cmap = mat->garray; 2943 PetscInt *diagIdx, *offdiagIdx; 2944 Vec diagV, offdiagV; 2945 PetscScalar *a, *diagA, *offdiagA; 2946 PetscInt r; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2952 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2953 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2954 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2955 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 for(r = 0; r < n; ++r) { 2959 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2960 a[r] = diagA[r]; 2961 idx[r] = cstart + diagIdx[r]; 2962 } else { 2963 a[r] = offdiagA[r]; 2964 idx[r] = cmap[offdiagIdx[r]]; 2965 } 2966 } 2967 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2969 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2970 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2971 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2972 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2978 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2979 { 2980 PetscErrorCode ierr; 2981 Mat *dummy; 2982 2983 PetscFunctionBegin; 2984 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2985 *newmat = *dummy; 2986 ierr = PetscFree(dummy);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2991 2992 #undef __FUNCT__ 2993 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2994 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2995 { 2996 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2997 PetscErrorCode ierr; 2998 2999 PetscFunctionBegin; 3000 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 3005 /* -------------------------------------------------------------------*/ 3006 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3007 MatGetRow_MPIAIJ, 3008 MatRestoreRow_MPIAIJ, 3009 MatMult_MPIAIJ, 3010 /* 4*/ MatMultAdd_MPIAIJ, 3011 MatMultTranspose_MPIAIJ, 3012 MatMultTransposeAdd_MPIAIJ, 3013 #ifdef PETSC_HAVE_PBGL 3014 MatSolve_MPIAIJ, 3015 #else 3016 0, 3017 #endif 3018 0, 3019 0, 3020 /*10*/ 0, 3021 0, 3022 0, 3023 MatSOR_MPIAIJ, 3024 MatTranspose_MPIAIJ, 3025 /*15*/ MatGetInfo_MPIAIJ, 3026 MatEqual_MPIAIJ, 3027 MatGetDiagonal_MPIAIJ, 3028 MatDiagonalScale_MPIAIJ, 3029 MatNorm_MPIAIJ, 3030 /*20*/ MatAssemblyBegin_MPIAIJ, 3031 MatAssemblyEnd_MPIAIJ, 3032 MatSetOption_MPIAIJ, 3033 MatZeroEntries_MPIAIJ, 3034 /*24*/ MatZeroRows_MPIAIJ, 3035 0, 3036 #ifdef PETSC_HAVE_PBGL 3037 0, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 /*29*/ MatSetUp_MPIAIJ, 3044 #ifdef PETSC_HAVE_PBGL 3045 0, 3046 #else 3047 0, 3048 #endif 3049 0, 3050 0, 3051 0, 3052 /*34*/ MatDuplicate_MPIAIJ, 3053 0, 3054 0, 3055 0, 3056 0, 3057 /*39*/ MatAXPY_MPIAIJ, 3058 MatGetSubMatrices_MPIAIJ, 3059 MatIncreaseOverlap_MPIAIJ, 3060 MatGetValues_MPIAIJ, 3061 MatCopy_MPIAIJ, 3062 /*44*/ MatGetRowMax_MPIAIJ, 3063 MatScale_MPIAIJ, 3064 0, 3065 0, 3066 MatZeroRowsColumns_MPIAIJ, 3067 /*49*/ 0, 3068 0, 3069 0, 3070 0, 3071 0, 3072 /*54*/ MatFDColoringCreate_MPIAIJ, 3073 0, 3074 MatSetUnfactored_MPIAIJ, 3075 MatPermute_MPIAIJ, 3076 0, 3077 /*59*/ MatGetSubMatrix_MPIAIJ, 3078 MatDestroy_MPIAIJ, 3079 MatView_MPIAIJ, 3080 0, 3081 0, 3082 /*64*/ 0, 3083 0, 3084 0, 3085 0, 3086 0, 3087 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3088 MatGetRowMinAbs_MPIAIJ, 3089 0, 3090 MatSetColoring_MPIAIJ, 3091 #if defined(PETSC_HAVE_ADIC) 3092 MatSetValuesAdic_MPIAIJ, 3093 #else 3094 0, 3095 #endif 3096 MatSetValuesAdifor_MPIAIJ, 3097 /*75*/ MatFDColoringApply_AIJ, 3098 0, 3099 0, 3100 0, 3101 0, 3102 /*80*/ 0, 3103 0, 3104 0, 3105 /*83*/ MatLoad_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 0, 3111 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3112 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3113 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3114 MatPtAP_Basic, 3115 MatPtAPSymbolic_MPIAIJ, 3116 /*94*/ MatPtAPNumeric_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*99*/ 0, 3122 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3123 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3124 MatConjugate_MPIAIJ, 3125 0, 3126 /*104*/MatSetValuesRow_MPIAIJ, 3127 MatRealPart_MPIAIJ, 3128 MatImaginaryPart_MPIAIJ, 3129 0, 3130 0, 3131 /*109*/0, 3132 MatGetRedundantMatrix_MPIAIJ, 3133 MatGetRowMin_MPIAIJ, 3134 0, 3135 0, 3136 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3137 0, 3138 0, 3139 0, 3140 0, 3141 /*119*/0, 3142 0, 3143 0, 3144 0, 3145 MatGetMultiProcBlock_MPIAIJ, 3146 /*124*/MatFindNonZeroRows_MPIAIJ, 3147 MatGetColumnNorms_MPIAIJ, 3148 MatInvertBlockDiagonal_MPIAIJ, 3149 0, 3150 MatGetSubMatricesParallel_MPIAIJ, 3151 /*129*/0, 3152 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3153 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3154 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3155 0, 3156 /*134*/0, 3157 0, 3158 0, 3159 0, 3160 0 3161 }; 3162 3163 /* ----------------------------------------------------------------------------------------*/ 3164 3165 EXTERN_C_BEGIN 3166 #undef __FUNCT__ 3167 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3168 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3169 { 3170 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3171 PetscErrorCode ierr; 3172 3173 PetscFunctionBegin; 3174 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3175 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 EXTERN_C_END 3179 3180 EXTERN_C_BEGIN 3181 #undef __FUNCT__ 3182 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3183 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3184 { 3185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3186 PetscErrorCode ierr; 3187 3188 PetscFunctionBegin; 3189 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3190 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3191 PetscFunctionReturn(0); 3192 } 3193 EXTERN_C_END 3194 3195 EXTERN_C_BEGIN 3196 #undef __FUNCT__ 3197 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3198 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3199 { 3200 Mat_MPIAIJ *b; 3201 PetscErrorCode ierr; 3202 PetscInt i; 3203 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3204 3205 PetscFunctionBegin; 3206 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3207 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3208 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3209 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3210 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3211 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3212 3213 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3214 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3215 if (d_nnz) { 3216 for (i=0; i<B->rmap->n; i++) { 3217 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3218 } 3219 } 3220 if (o_nnz) { 3221 for (i=0; i<B->rmap->n; i++) { 3222 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3223 } 3224 } 3225 b = (Mat_MPIAIJ*)B->data; 3226 3227 if (!B->preallocated) { 3228 /* Explicitly create 2 MATSEQAIJ matrices. */ 3229 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3230 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3231 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3232 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3233 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3234 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3235 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3236 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3237 } 3238 3239 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3240 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3241 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3242 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3243 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3244 B->preallocated = PETSC_TRUE; 3245 PetscFunctionReturn(0); 3246 } 3247 EXTERN_C_END 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3251 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3252 { 3253 Mat mat; 3254 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 *newmat = 0; 3259 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3260 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3261 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3262 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3263 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3264 a = (Mat_MPIAIJ*)mat->data; 3265 3266 mat->factortype = matin->factortype; 3267 mat->rmap->bs = matin->rmap->bs; 3268 mat->cmap->bs = matin->cmap->bs; 3269 mat->assembled = PETSC_TRUE; 3270 mat->insertmode = NOT_SET_VALUES; 3271 mat->preallocated = PETSC_TRUE; 3272 3273 a->size = oldmat->size; 3274 a->rank = oldmat->rank; 3275 a->donotstash = oldmat->donotstash; 3276 a->roworiented = oldmat->roworiented; 3277 a->rowindices = 0; 3278 a->rowvalues = 0; 3279 a->getrowactive = PETSC_FALSE; 3280 3281 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3282 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3283 3284 if (oldmat->colmap) { 3285 #if defined (PETSC_USE_CTABLE) 3286 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3287 #else 3288 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3289 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3290 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3291 #endif 3292 } else a->colmap = 0; 3293 if (oldmat->garray) { 3294 PetscInt len; 3295 len = oldmat->B->cmap->n; 3296 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3297 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3298 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3299 } else a->garray = 0; 3300 3301 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3302 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3303 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3305 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3306 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3307 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3308 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3309 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3310 *newmat = mat; 3311 PetscFunctionReturn(0); 3312 } 3313 3314 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "MatLoad_MPIAIJ" 3318 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3319 { 3320 PetscScalar *vals,*svals; 3321 MPI_Comm comm = ((PetscObject)viewer)->comm; 3322 PetscErrorCode ierr; 3323 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3324 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3325 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3326 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3327 PetscInt cend,cstart,n,*rowners,sizesset=1; 3328 int fd; 3329 3330 PetscFunctionBegin; 3331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3332 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3333 if (!rank) { 3334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3335 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3336 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3337 } 3338 3339 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3340 3341 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3342 M = header[1]; N = header[2]; 3343 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3344 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3345 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3346 3347 /* If global sizes are set, check if they are consistent with that given in the file */ 3348 if (sizesset) { 3349 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3350 } 3351 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3352 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3353 3354 /* determine ownership of all rows */ 3355 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3356 else m = newMat->rmap->n; /* Set by user */ 3357 3358 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3359 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3360 3361 /* First process needs enough room for process with most rows */ 3362 if (!rank) { 3363 mmax = rowners[1]; 3364 for (i=2; i<size; i++) { 3365 mmax = PetscMax(mmax,rowners[i]); 3366 } 3367 } else mmax = m; 3368 3369 rowners[0] = 0; 3370 for (i=2; i<=size; i++) { 3371 rowners[i] += rowners[i-1]; 3372 } 3373 rstart = rowners[rank]; 3374 rend = rowners[rank+1]; 3375 3376 /* distribute row lengths to all processors */ 3377 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3378 if (!rank) { 3379 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3380 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3381 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3382 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3383 for (j=0; j<m; j++) { 3384 procsnz[0] += ourlens[j]; 3385 } 3386 for (i=1; i<size; i++) { 3387 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3388 /* calculate the number of nonzeros on each processor */ 3389 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3390 procsnz[i] += rowlengths[j]; 3391 } 3392 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3393 } 3394 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3395 } else { 3396 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3397 } 3398 3399 if (!rank) { 3400 /* determine max buffer needed and allocate it */ 3401 maxnz = 0; 3402 for (i=0; i<size; i++) { 3403 maxnz = PetscMax(maxnz,procsnz[i]); 3404 } 3405 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3406 3407 /* read in my part of the matrix column indices */ 3408 nz = procsnz[0]; 3409 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3410 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3411 3412 /* read in every one elses and ship off */ 3413 for (i=1; i<size; i++) { 3414 nz = procsnz[i]; 3415 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3416 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3417 } 3418 ierr = PetscFree(cols);CHKERRQ(ierr); 3419 } else { 3420 /* determine buffer space needed for message */ 3421 nz = 0; 3422 for (i=0; i<m; i++) { 3423 nz += ourlens[i]; 3424 } 3425 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3426 3427 /* receive message of column indices*/ 3428 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3429 } 3430 3431 /* determine column ownership if matrix is not square */ 3432 if (N != M) { 3433 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3434 else n = newMat->cmap->n; 3435 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3436 cstart = cend - n; 3437 } else { 3438 cstart = rstart; 3439 cend = rend; 3440 n = cend - cstart; 3441 } 3442 3443 /* loop over local rows, determining number of off diagonal entries */ 3444 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3445 jj = 0; 3446 for (i=0; i<m; i++) { 3447 for (j=0; j<ourlens[i]; j++) { 3448 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3449 jj++; 3450 } 3451 } 3452 3453 for (i=0; i<m; i++) { 3454 ourlens[i] -= offlens[i]; 3455 } 3456 if (!sizesset) { 3457 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3458 } 3459 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3460 3461 for (i=0; i<m; i++) { 3462 ourlens[i] += offlens[i]; 3463 } 3464 3465 if (!rank) { 3466 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3467 3468 /* read in my part of the matrix numerical values */ 3469 nz = procsnz[0]; 3470 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3471 3472 /* insert into matrix */ 3473 jj = rstart; 3474 smycols = mycols; 3475 svals = vals; 3476 for (i=0; i<m; i++) { 3477 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3478 smycols += ourlens[i]; 3479 svals += ourlens[i]; 3480 jj++; 3481 } 3482 3483 /* read in other processors and ship out */ 3484 for (i=1; i<size; i++) { 3485 nz = procsnz[i]; 3486 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3487 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3488 } 3489 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3490 } else { 3491 /* receive numeric values */ 3492 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3493 3494 /* receive message of values*/ 3495 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3496 3497 /* insert into matrix */ 3498 jj = rstart; 3499 smycols = mycols; 3500 svals = vals; 3501 for (i=0; i<m; i++) { 3502 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3503 smycols += ourlens[i]; 3504 svals += ourlens[i]; 3505 jj++; 3506 } 3507 } 3508 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3509 ierr = PetscFree(vals);CHKERRQ(ierr); 3510 ierr = PetscFree(mycols);CHKERRQ(ierr); 3511 ierr = PetscFree(rowners);CHKERRQ(ierr); 3512 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3513 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3519 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3520 { 3521 PetscErrorCode ierr; 3522 IS iscol_local; 3523 PetscInt csize; 3524 3525 PetscFunctionBegin; 3526 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3527 if (call == MAT_REUSE_MATRIX) { 3528 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3529 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3530 } else { 3531 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3532 } 3533 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3534 if (call == MAT_INITIAL_MATRIX) { 3535 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3536 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3537 } 3538 PetscFunctionReturn(0); 3539 } 3540 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3543 /* 3544 Not great since it makes two copies of the submatrix, first an SeqAIJ 3545 in local and then by concatenating the local matrices the end result. 3546 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3547 3548 Note: This requires a sequential iscol with all indices. 3549 */ 3550 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3551 { 3552 PetscErrorCode ierr; 3553 PetscMPIInt rank,size; 3554 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3555 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3556 Mat *local,M,Mreuse; 3557 MatScalar *vwork,*aa; 3558 MPI_Comm comm = ((PetscObject)mat)->comm; 3559 Mat_SeqAIJ *aij; 3560 3561 3562 PetscFunctionBegin; 3563 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3564 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3565 3566 if (call == MAT_REUSE_MATRIX) { 3567 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3568 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3569 local = &Mreuse; 3570 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3571 } else { 3572 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3573 Mreuse = *local; 3574 ierr = PetscFree(local);CHKERRQ(ierr); 3575 } 3576 3577 /* 3578 m - number of local rows 3579 n - number of columns (same on all processors) 3580 rstart - first row in new global matrix generated 3581 */ 3582 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3583 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3584 if (call == MAT_INITIAL_MATRIX) { 3585 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3586 ii = aij->i; 3587 jj = aij->j; 3588 3589 /* 3590 Determine the number of non-zeros in the diagonal and off-diagonal 3591 portions of the matrix in order to do correct preallocation 3592 */ 3593 3594 /* first get start and end of "diagonal" columns */ 3595 if (csize == PETSC_DECIDE) { 3596 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3597 if (mglobal == n) { /* square matrix */ 3598 nlocal = m; 3599 } else { 3600 nlocal = n/size + ((n % size) > rank); 3601 } 3602 } else { 3603 nlocal = csize; 3604 } 3605 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3606 rstart = rend - nlocal; 3607 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3608 3609 /* next, compute all the lengths */ 3610 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3611 olens = dlens + m; 3612 for (i=0; i<m; i++) { 3613 jend = ii[i+1] - ii[i]; 3614 olen = 0; 3615 dlen = 0; 3616 for (j=0; j<jend; j++) { 3617 if (*jj < rstart || *jj >= rend) olen++; 3618 else dlen++; 3619 jj++; 3620 } 3621 olens[i] = olen; 3622 dlens[i] = dlen; 3623 } 3624 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3625 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3626 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3627 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3628 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3629 ierr = PetscFree(dlens);CHKERRQ(ierr); 3630 } else { 3631 PetscInt ml,nl; 3632 3633 M = *newmat; 3634 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3635 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3636 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3637 /* 3638 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3639 rather than the slower MatSetValues(). 3640 */ 3641 M->was_assembled = PETSC_TRUE; 3642 M->assembled = PETSC_FALSE; 3643 } 3644 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3645 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3646 ii = aij->i; 3647 jj = aij->j; 3648 aa = aij->a; 3649 for (i=0; i<m; i++) { 3650 row = rstart + i; 3651 nz = ii[i+1] - ii[i]; 3652 cwork = jj; jj += nz; 3653 vwork = aa; aa += nz; 3654 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3655 } 3656 3657 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3659 *newmat = M; 3660 3661 /* save submatrix used in processor for next request */ 3662 if (call == MAT_INITIAL_MATRIX) { 3663 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3664 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3665 } 3666 3667 PetscFunctionReturn(0); 3668 } 3669 3670 EXTERN_C_BEGIN 3671 #undef __FUNCT__ 3672 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3673 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3674 { 3675 PetscInt m,cstart, cend,j,nnz,i,d; 3676 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3677 const PetscInt *JJ; 3678 PetscScalar *values; 3679 PetscErrorCode ierr; 3680 3681 PetscFunctionBegin; 3682 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3683 3684 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3685 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3686 m = B->rmap->n; 3687 cstart = B->cmap->rstart; 3688 cend = B->cmap->rend; 3689 rstart = B->rmap->rstart; 3690 3691 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3692 3693 #if defined(PETSC_USE_DEBUGGING) 3694 for (i=0; i<m; i++) { 3695 nnz = Ii[i+1]- Ii[i]; 3696 JJ = J + Ii[i]; 3697 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3698 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3699 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3700 } 3701 #endif 3702 3703 for (i=0; i<m; i++) { 3704 nnz = Ii[i+1]- Ii[i]; 3705 JJ = J + Ii[i]; 3706 nnz_max = PetscMax(nnz_max,nnz); 3707 d = 0; 3708 for (j=0; j<nnz; j++) { 3709 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3710 } 3711 d_nnz[i] = d; 3712 o_nnz[i] = nnz - d; 3713 } 3714 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3715 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3716 3717 if (v) values = (PetscScalar*)v; 3718 else { 3719 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3720 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3721 } 3722 3723 for (i=0; i<m; i++) { 3724 ii = i + rstart; 3725 nnz = Ii[i+1]- Ii[i]; 3726 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3727 } 3728 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3729 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3730 3731 if (!v) { 3732 ierr = PetscFree(values);CHKERRQ(ierr); 3733 } 3734 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3735 PetscFunctionReturn(0); 3736 } 3737 EXTERN_C_END 3738 3739 #undef __FUNCT__ 3740 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3741 /*@ 3742 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3743 (the default parallel PETSc format). 3744 3745 Collective on MPI_Comm 3746 3747 Input Parameters: 3748 + B - the matrix 3749 . i - the indices into j for the start of each local row (starts with zero) 3750 . j - the column indices for each local row (starts with zero) 3751 - v - optional values in the matrix 3752 3753 Level: developer 3754 3755 Notes: 3756 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3757 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3758 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3759 3760 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3761 3762 The format which is used for the sparse matrix input, is equivalent to a 3763 row-major ordering.. i.e for the following matrix, the input data expected is 3764 as shown: 3765 3766 1 0 0 3767 2 0 3 P0 3768 ------- 3769 4 5 6 P1 3770 3771 Process0 [P0]: rows_owned=[0,1] 3772 i = {0,1,3} [size = nrow+1 = 2+1] 3773 j = {0,0,2} [size = nz = 6] 3774 v = {1,2,3} [size = nz = 6] 3775 3776 Process1 [P1]: rows_owned=[2] 3777 i = {0,3} [size = nrow+1 = 1+1] 3778 j = {0,1,2} [size = nz = 6] 3779 v = {4,5,6} [size = nz = 6] 3780 3781 .keywords: matrix, aij, compressed row, sparse, parallel 3782 3783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3784 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3785 @*/ 3786 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3787 { 3788 PetscErrorCode ierr; 3789 3790 PetscFunctionBegin; 3791 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3792 PetscFunctionReturn(0); 3793 } 3794 3795 #undef __FUNCT__ 3796 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3797 /*@C 3798 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3799 (the default parallel PETSc format). For good matrix assembly performance 3800 the user should preallocate the matrix storage by setting the parameters 3801 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3802 performance can be increased by more than a factor of 50. 3803 3804 Collective on MPI_Comm 3805 3806 Input Parameters: 3807 + A - the matrix 3808 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3809 (same value is used for all local rows) 3810 . d_nnz - array containing the number of nonzeros in the various rows of the 3811 DIAGONAL portion of the local submatrix (possibly different for each row) 3812 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3813 The size of this array is equal to the number of local rows, i.e 'm'. 3814 For matrices that will be factored, you must leave room for (and set) 3815 the diagonal entry even if it is zero. 3816 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3817 submatrix (same value is used for all local rows). 3818 - o_nnz - array containing the number of nonzeros in the various rows of the 3819 OFF-DIAGONAL portion of the local submatrix (possibly different for 3820 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3821 structure. The size of this array is equal to the number 3822 of local rows, i.e 'm'. 3823 3824 If the *_nnz parameter is given then the *_nz parameter is ignored 3825 3826 The AIJ format (also called the Yale sparse matrix format or 3827 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3828 storage. The stored row and column indices begin with zero. 3829 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3830 3831 The parallel matrix is partitioned such that the first m0 rows belong to 3832 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3833 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3834 3835 The DIAGONAL portion of the local submatrix of a processor can be defined 3836 as the submatrix which is obtained by extraction the part corresponding to 3837 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3838 first row that belongs to the processor, r2 is the last row belonging to 3839 the this processor, and c1-c2 is range of indices of the local part of a 3840 vector suitable for applying the matrix to. This is an mxn matrix. In the 3841 common case of a square matrix, the row and column ranges are the same and 3842 the DIAGONAL part is also square. The remaining portion of the local 3843 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3844 3845 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3846 3847 You can call MatGetInfo() to get information on how effective the preallocation was; 3848 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3849 You can also run with the option -info and look for messages with the string 3850 malloc in them to see if additional memory allocation was needed. 3851 3852 Example usage: 3853 3854 Consider the following 8x8 matrix with 34 non-zero values, that is 3855 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3856 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3857 as follows: 3858 3859 .vb 3860 1 2 0 | 0 3 0 | 0 4 3861 Proc0 0 5 6 | 7 0 0 | 8 0 3862 9 0 10 | 11 0 0 | 12 0 3863 ------------------------------------- 3864 13 0 14 | 15 16 17 | 0 0 3865 Proc1 0 18 0 | 19 20 21 | 0 0 3866 0 0 0 | 22 23 0 | 24 0 3867 ------------------------------------- 3868 Proc2 25 26 27 | 0 0 28 | 29 0 3869 30 0 0 | 31 32 33 | 0 34 3870 .ve 3871 3872 This can be represented as a collection of submatrices as: 3873 3874 .vb 3875 A B C 3876 D E F 3877 G H I 3878 .ve 3879 3880 Where the submatrices A,B,C are owned by proc0, D,E,F are 3881 owned by proc1, G,H,I are owned by proc2. 3882 3883 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3884 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3885 The 'M','N' parameters are 8,8, and have the same values on all procs. 3886 3887 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3888 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3889 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3890 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3891 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3892 matrix, ans [DF] as another SeqAIJ matrix. 3893 3894 When d_nz, o_nz parameters are specified, d_nz storage elements are 3895 allocated for every row of the local diagonal submatrix, and o_nz 3896 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3897 One way to choose d_nz and o_nz is to use the max nonzerors per local 3898 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3899 In this case, the values of d_nz,o_nz are: 3900 .vb 3901 proc0 : dnz = 2, o_nz = 2 3902 proc1 : dnz = 3, o_nz = 2 3903 proc2 : dnz = 1, o_nz = 4 3904 .ve 3905 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3906 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3907 for proc3. i.e we are using 12+15+10=37 storage locations to store 3908 34 values. 3909 3910 When d_nnz, o_nnz parameters are specified, the storage is specified 3911 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3912 In the above case the values for d_nnz,o_nnz are: 3913 .vb 3914 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3915 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3916 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3917 .ve 3918 Here the space allocated is sum of all the above values i.e 34, and 3919 hence pre-allocation is perfect. 3920 3921 Level: intermediate 3922 3923 .keywords: matrix, aij, compressed row, sparse, parallel 3924 3925 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3926 MPIAIJ, MatGetInfo() 3927 @*/ 3928 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3929 { 3930 PetscErrorCode ierr; 3931 3932 PetscFunctionBegin; 3933 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3934 PetscValidType(B,1); 3935 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3936 PetscFunctionReturn(0); 3937 } 3938 3939 #undef __FUNCT__ 3940 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3941 /*@ 3942 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3943 CSR format the local rows. 3944 3945 Collective on MPI_Comm 3946 3947 Input Parameters: 3948 + comm - MPI communicator 3949 . m - number of local rows (Cannot be PETSC_DECIDE) 3950 . n - This value should be the same as the local size used in creating the 3951 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3952 calculated if N is given) For square matrices n is almost always m. 3953 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3954 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3955 . i - row indices 3956 . j - column indices 3957 - a - matrix values 3958 3959 Output Parameter: 3960 . mat - the matrix 3961 3962 Level: intermediate 3963 3964 Notes: 3965 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3966 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3967 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3968 3969 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3970 3971 The format which is used for the sparse matrix input, is equivalent to a 3972 row-major ordering.. i.e for the following matrix, the input data expected is 3973 as shown: 3974 3975 1 0 0 3976 2 0 3 P0 3977 ------- 3978 4 5 6 P1 3979 3980 Process0 [P0]: rows_owned=[0,1] 3981 i = {0,1,3} [size = nrow+1 = 2+1] 3982 j = {0,0,2} [size = nz = 6] 3983 v = {1,2,3} [size = nz = 6] 3984 3985 Process1 [P1]: rows_owned=[2] 3986 i = {0,3} [size = nrow+1 = 1+1] 3987 j = {0,1,2} [size = nz = 6] 3988 v = {4,5,6} [size = nz = 6] 3989 3990 .keywords: matrix, aij, compressed row, sparse, parallel 3991 3992 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3993 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3994 @*/ 3995 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3996 { 3997 PetscErrorCode ierr; 3998 3999 PetscFunctionBegin; 4000 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4001 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4002 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4003 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4004 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4005 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4006 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 #undef __FUNCT__ 4011 #define __FUNCT__ "MatCreateAIJ" 4012 /*@C 4013 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4014 (the default parallel PETSc format). For good matrix assembly performance 4015 the user should preallocate the matrix storage by setting the parameters 4016 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4017 performance can be increased by more than a factor of 50. 4018 4019 Collective on MPI_Comm 4020 4021 Input Parameters: 4022 + comm - MPI communicator 4023 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4024 This value should be the same as the local size used in creating the 4025 y vector for the matrix-vector product y = Ax. 4026 . n - This value should be the same as the local size used in creating the 4027 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4028 calculated if N is given) For square matrices n is almost always m. 4029 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4030 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4031 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4032 (same value is used for all local rows) 4033 . d_nnz - array containing the number of nonzeros in the various rows of the 4034 DIAGONAL portion of the local submatrix (possibly different for each row) 4035 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4036 The size of this array is equal to the number of local rows, i.e 'm'. 4037 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4038 submatrix (same value is used for all local rows). 4039 - o_nnz - array containing the number of nonzeros in the various rows of the 4040 OFF-DIAGONAL portion of the local submatrix (possibly different for 4041 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4042 structure. The size of this array is equal to the number 4043 of local rows, i.e 'm'. 4044 4045 Output Parameter: 4046 . A - the matrix 4047 4048 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4049 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4050 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4051 4052 Notes: 4053 If the *_nnz parameter is given then the *_nz parameter is ignored 4054 4055 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4056 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4057 storage requirements for this matrix. 4058 4059 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4060 processor than it must be used on all processors that share the object for 4061 that argument. 4062 4063 The user MUST specify either the local or global matrix dimensions 4064 (possibly both). 4065 4066 The parallel matrix is partitioned across processors such that the 4067 first m0 rows belong to process 0, the next m1 rows belong to 4068 process 1, the next m2 rows belong to process 2 etc.. where 4069 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4070 values corresponding to [m x N] submatrix. 4071 4072 The columns are logically partitioned with the n0 columns belonging 4073 to 0th partition, the next n1 columns belonging to the next 4074 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4075 4076 The DIAGONAL portion of the local submatrix on any given processor 4077 is the submatrix corresponding to the rows and columns m,n 4078 corresponding to the given processor. i.e diagonal matrix on 4079 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4080 etc. The remaining portion of the local submatrix [m x (N-n)] 4081 constitute the OFF-DIAGONAL portion. The example below better 4082 illustrates this concept. 4083 4084 For a square global matrix we define each processor's diagonal portion 4085 to be its local rows and the corresponding columns (a square submatrix); 4086 each processor's off-diagonal portion encompasses the remainder of the 4087 local matrix (a rectangular submatrix). 4088 4089 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4090 4091 When calling this routine with a single process communicator, a matrix of 4092 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4093 type of communicator, use the construction mechanism: 4094 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4095 4096 By default, this format uses inodes (identical nodes) when possible. 4097 We search for consecutive rows with the same nonzero structure, thereby 4098 reusing matrix information to achieve increased efficiency. 4099 4100 Options Database Keys: 4101 + -mat_no_inode - Do not use inodes 4102 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4103 - -mat_aij_oneindex - Internally use indexing starting at 1 4104 rather than 0. Note that when calling MatSetValues(), 4105 the user still MUST index entries starting at 0! 4106 4107 4108 Example usage: 4109 4110 Consider the following 8x8 matrix with 34 non-zero values, that is 4111 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4112 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4113 as follows: 4114 4115 .vb 4116 1 2 0 | 0 3 0 | 0 4 4117 Proc0 0 5 6 | 7 0 0 | 8 0 4118 9 0 10 | 11 0 0 | 12 0 4119 ------------------------------------- 4120 13 0 14 | 15 16 17 | 0 0 4121 Proc1 0 18 0 | 19 20 21 | 0 0 4122 0 0 0 | 22 23 0 | 24 0 4123 ------------------------------------- 4124 Proc2 25 26 27 | 0 0 28 | 29 0 4125 30 0 0 | 31 32 33 | 0 34 4126 .ve 4127 4128 This can be represented as a collection of submatrices as: 4129 4130 .vb 4131 A B C 4132 D E F 4133 G H I 4134 .ve 4135 4136 Where the submatrices A,B,C are owned by proc0, D,E,F are 4137 owned by proc1, G,H,I are owned by proc2. 4138 4139 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4140 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4141 The 'M','N' parameters are 8,8, and have the same values on all procs. 4142 4143 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4144 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4145 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4146 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4147 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4148 matrix, ans [DF] as another SeqAIJ matrix. 4149 4150 When d_nz, o_nz parameters are specified, d_nz storage elements are 4151 allocated for every row of the local diagonal submatrix, and o_nz 4152 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4153 One way to choose d_nz and o_nz is to use the max nonzerors per local 4154 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4155 In this case, the values of d_nz,o_nz are: 4156 .vb 4157 proc0 : dnz = 2, o_nz = 2 4158 proc1 : dnz = 3, o_nz = 2 4159 proc2 : dnz = 1, o_nz = 4 4160 .ve 4161 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4162 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4163 for proc3. i.e we are using 12+15+10=37 storage locations to store 4164 34 values. 4165 4166 When d_nnz, o_nnz parameters are specified, the storage is specified 4167 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4168 In the above case the values for d_nnz,o_nnz are: 4169 .vb 4170 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4171 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4172 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4173 .ve 4174 Here the space allocated is sum of all the above values i.e 34, and 4175 hence pre-allocation is perfect. 4176 4177 Level: intermediate 4178 4179 .keywords: matrix, aij, compressed row, sparse, parallel 4180 4181 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4182 MPIAIJ, MatCreateMPIAIJWithArrays() 4183 @*/ 4184 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4185 { 4186 PetscErrorCode ierr; 4187 PetscMPIInt size; 4188 4189 PetscFunctionBegin; 4190 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4191 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4193 if (size > 1) { 4194 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4195 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4196 } else { 4197 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4198 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4199 } 4200 PetscFunctionReturn(0); 4201 } 4202 4203 #undef __FUNCT__ 4204 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4205 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4206 { 4207 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4208 4209 PetscFunctionBegin; 4210 *Ad = a->A; 4211 *Ao = a->B; 4212 *colmap = a->garray; 4213 PetscFunctionReturn(0); 4214 } 4215 4216 #undef __FUNCT__ 4217 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4218 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4219 { 4220 PetscErrorCode ierr; 4221 PetscInt i; 4222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4223 4224 PetscFunctionBegin; 4225 if (coloring->ctype == IS_COLORING_GLOBAL) { 4226 ISColoringValue *allcolors,*colors; 4227 ISColoring ocoloring; 4228 4229 /* set coloring for diagonal portion */ 4230 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4231 4232 /* set coloring for off-diagonal portion */ 4233 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4234 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4235 for (i=0; i<a->B->cmap->n; i++) { 4236 colors[i] = allcolors[a->garray[i]]; 4237 } 4238 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4239 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4240 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4241 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4242 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4243 ISColoringValue *colors; 4244 PetscInt *larray; 4245 ISColoring ocoloring; 4246 4247 /* set coloring for diagonal portion */ 4248 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4249 for (i=0; i<a->A->cmap->n; i++) { 4250 larray[i] = i + A->cmap->rstart; 4251 } 4252 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4253 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4254 for (i=0; i<a->A->cmap->n; i++) { 4255 colors[i] = coloring->colors[larray[i]]; 4256 } 4257 ierr = PetscFree(larray);CHKERRQ(ierr); 4258 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4259 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4260 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4261 4262 /* set coloring for off-diagonal portion */ 4263 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4264 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4265 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4266 for (i=0; i<a->B->cmap->n; i++) { 4267 colors[i] = coloring->colors[larray[i]]; 4268 } 4269 ierr = PetscFree(larray);CHKERRQ(ierr); 4270 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4271 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4272 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4273 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4274 4275 PetscFunctionReturn(0); 4276 } 4277 4278 #if defined(PETSC_HAVE_ADIC) 4279 #undef __FUNCT__ 4280 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4281 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4282 { 4283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4284 PetscErrorCode ierr; 4285 4286 PetscFunctionBegin; 4287 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4288 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4289 PetscFunctionReturn(0); 4290 } 4291 #endif 4292 4293 #undef __FUNCT__ 4294 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4295 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4296 { 4297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4298 PetscErrorCode ierr; 4299 4300 PetscFunctionBegin; 4301 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4302 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4303 PetscFunctionReturn(0); 4304 } 4305 4306 #undef __FUNCT__ 4307 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4308 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4309 { 4310 PetscErrorCode ierr; 4311 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4312 PetscInt *indx; 4313 4314 PetscFunctionBegin; 4315 /* This routine will ONLY return MPIAIJ type matrix */ 4316 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4317 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4318 if (n == PETSC_DECIDE){ 4319 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4320 } 4321 /* Check sum(n) = N */ 4322 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4323 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4324 4325 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4326 rstart -= m; 4327 4328 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4329 for (i=0;i<m;i++) { 4330 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4331 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4332 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4333 } 4334 4335 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4336 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4337 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4338 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4339 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4340 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4341 PetscFunctionReturn(0); 4342 } 4343 4344 #undef __FUNCT__ 4345 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4346 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4347 { 4348 PetscErrorCode ierr; 4349 PetscInt m,N,i,rstart,nnz,Ii; 4350 PetscInt *indx; 4351 PetscScalar *values; 4352 4353 PetscFunctionBegin; 4354 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4355 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4356 for (i=0;i<m;i++) { 4357 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4358 Ii = i + rstart; 4359 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4360 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4361 } 4362 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4363 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4364 PetscFunctionReturn(0); 4365 } 4366 4367 #undef __FUNCT__ 4368 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4369 /*@ 4370 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4371 matrices from each processor 4372 4373 Collective on MPI_Comm 4374 4375 Input Parameters: 4376 + comm - the communicators the parallel matrix will live on 4377 . inmat - the input sequential matrices 4378 . n - number of local columns (or PETSC_DECIDE) 4379 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4380 4381 Output Parameter: 4382 . outmat - the parallel matrix generated 4383 4384 Level: advanced 4385 4386 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4387 4388 @*/ 4389 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4390 { 4391 PetscErrorCode ierr; 4392 4393 PetscFunctionBegin; 4394 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4395 if (scall == MAT_INITIAL_MATRIX){ 4396 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4397 } 4398 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4399 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4400 PetscFunctionReturn(0); 4401 } 4402 4403 #undef __FUNCT__ 4404 #define __FUNCT__ "MatFileSplit" 4405 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4406 { 4407 PetscErrorCode ierr; 4408 PetscMPIInt rank; 4409 PetscInt m,N,i,rstart,nnz; 4410 size_t len; 4411 const PetscInt *indx; 4412 PetscViewer out; 4413 char *name; 4414 Mat B; 4415 const PetscScalar *values; 4416 4417 PetscFunctionBegin; 4418 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4419 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4420 /* Should this be the type of the diagonal block of A? */ 4421 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4422 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4423 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4424 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4425 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4426 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4427 for (i=0;i<m;i++) { 4428 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4429 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4430 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4431 } 4432 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4433 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4434 4435 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4436 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4437 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4438 sprintf(name,"%s.%d",outfile,rank); 4439 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4440 ierr = PetscFree(name); 4441 ierr = MatView(B,out);CHKERRQ(ierr); 4442 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4443 ierr = MatDestroy(&B);CHKERRQ(ierr); 4444 PetscFunctionReturn(0); 4445 } 4446 4447 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4448 #undef __FUNCT__ 4449 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4450 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4451 { 4452 PetscErrorCode ierr; 4453 Mat_Merge_SeqsToMPI *merge; 4454 PetscContainer container; 4455 4456 PetscFunctionBegin; 4457 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4458 if (container) { 4459 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4472 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4473 ierr = PetscFree(merge);CHKERRQ(ierr); 4474 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4475 } 4476 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 #include <../src/mat/utils/freespace.h> 4481 #include <petscbt.h> 4482 4483 #undef __FUNCT__ 4484 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4485 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4486 { 4487 PetscErrorCode ierr; 4488 MPI_Comm comm=((PetscObject)mpimat)->comm; 4489 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4490 PetscMPIInt size,rank,taga,*len_s; 4491 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4492 PetscInt proc,m; 4493 PetscInt **buf_ri,**buf_rj; 4494 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4495 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4496 MPI_Request *s_waits,*r_waits; 4497 MPI_Status *status; 4498 MatScalar *aa=a->a; 4499 MatScalar **abuf_r,*ba_i; 4500 Mat_Merge_SeqsToMPI *merge; 4501 PetscContainer container; 4502 4503 PetscFunctionBegin; 4504 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4505 4506 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4507 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4508 4509 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4510 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4511 4512 bi = merge->bi; 4513 bj = merge->bj; 4514 buf_ri = merge->buf_ri; 4515 buf_rj = merge->buf_rj; 4516 4517 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4518 owners = merge->rowmap->range; 4519 len_s = merge->len_s; 4520 4521 /* send and recv matrix values */ 4522 /*-----------------------------*/ 4523 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4524 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4525 4526 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4527 for (proc=0,k=0; proc<size; proc++){ 4528 if (!len_s[proc]) continue; 4529 i = owners[proc]; 4530 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4531 k++; 4532 } 4533 4534 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4535 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4536 ierr = PetscFree(status);CHKERRQ(ierr); 4537 4538 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4539 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4540 4541 /* insert mat values of mpimat */ 4542 /*----------------------------*/ 4543 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4544 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4545 4546 for (k=0; k<merge->nrecv; k++){ 4547 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4548 nrows = *(buf_ri_k[k]); 4549 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4550 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4551 } 4552 4553 /* set values of ba */ 4554 m = merge->rowmap->n; 4555 for (i=0; i<m; i++) { 4556 arow = owners[rank] + i; 4557 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4558 bnzi = bi[i+1] - bi[i]; 4559 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4560 4561 /* add local non-zero vals of this proc's seqmat into ba */ 4562 anzi = ai[arow+1] - ai[arow]; 4563 aj = a->j + ai[arow]; 4564 aa = a->a + ai[arow]; 4565 nextaj = 0; 4566 for (j=0; nextaj<anzi; j++){ 4567 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4568 ba_i[j] += aa[nextaj++]; 4569 } 4570 } 4571 4572 /* add received vals into ba */ 4573 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4574 /* i-th row */ 4575 if (i == *nextrow[k]) { 4576 anzi = *(nextai[k]+1) - *nextai[k]; 4577 aj = buf_rj[k] + *(nextai[k]); 4578 aa = abuf_r[k] + *(nextai[k]); 4579 nextaj = 0; 4580 for (j=0; nextaj<anzi; j++){ 4581 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4582 ba_i[j] += aa[nextaj++]; 4583 } 4584 } 4585 nextrow[k]++; nextai[k]++; 4586 } 4587 } 4588 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4589 } 4590 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4591 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4592 4593 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4594 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4595 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4596 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4597 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4598 PetscFunctionReturn(0); 4599 } 4600 4601 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4602 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4605 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4606 { 4607 PetscErrorCode ierr; 4608 Mat B_mpi; 4609 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4610 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4611 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4612 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4613 PetscInt len,proc,*dnz,*onz,bs,cbs; 4614 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4615 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4616 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4617 MPI_Status *status; 4618 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4619 PetscBT lnkbt; 4620 Mat_Merge_SeqsToMPI *merge; 4621 PetscContainer container; 4622 4623 PetscFunctionBegin; 4624 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4625 4626 /* make sure it is a PETSc comm */ 4627 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4628 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4629 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4630 4631 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4632 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4633 4634 /* determine row ownership */ 4635 /*---------------------------------------------------------*/ 4636 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4637 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4638 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4639 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4640 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4641 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4642 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4643 4644 m = merge->rowmap->n; 4645 M = merge->rowmap->N; 4646 owners = merge->rowmap->range; 4647 4648 /* determine the number of messages to send, their lengths */ 4649 /*---------------------------------------------------------*/ 4650 len_s = merge->len_s; 4651 4652 len = 0; /* length of buf_si[] */ 4653 merge->nsend = 0; 4654 for (proc=0; proc<size; proc++){ 4655 len_si[proc] = 0; 4656 if (proc == rank){ 4657 len_s[proc] = 0; 4658 } else { 4659 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4660 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4661 } 4662 if (len_s[proc]) { 4663 merge->nsend++; 4664 nrows = 0; 4665 for (i=owners[proc]; i<owners[proc+1]; i++){ 4666 if (ai[i+1] > ai[i]) nrows++; 4667 } 4668 len_si[proc] = 2*(nrows+1); 4669 len += len_si[proc]; 4670 } 4671 } 4672 4673 /* determine the number and length of messages to receive for ij-structure */ 4674 /*-------------------------------------------------------------------------*/ 4675 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4676 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4677 4678 /* post the Irecv of j-structure */ 4679 /*-------------------------------*/ 4680 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4681 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4682 4683 /* post the Isend of j-structure */ 4684 /*--------------------------------*/ 4685 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4686 4687 for (proc=0, k=0; proc<size; proc++){ 4688 if (!len_s[proc]) continue; 4689 i = owners[proc]; 4690 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4691 k++; 4692 } 4693 4694 /* receives and sends of j-structure are complete */ 4695 /*------------------------------------------------*/ 4696 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4697 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4698 4699 /* send and recv i-structure */ 4700 /*---------------------------*/ 4701 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4702 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4703 4704 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4705 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4706 for (proc=0,k=0; proc<size; proc++){ 4707 if (!len_s[proc]) continue; 4708 /* form outgoing message for i-structure: 4709 buf_si[0]: nrows to be sent 4710 [1:nrows]: row index (global) 4711 [nrows+1:2*nrows+1]: i-structure index 4712 */ 4713 /*-------------------------------------------*/ 4714 nrows = len_si[proc]/2 - 1; 4715 buf_si_i = buf_si + nrows+1; 4716 buf_si[0] = nrows; 4717 buf_si_i[0] = 0; 4718 nrows = 0; 4719 for (i=owners[proc]; i<owners[proc+1]; i++){ 4720 anzi = ai[i+1] - ai[i]; 4721 if (anzi) { 4722 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4723 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4724 nrows++; 4725 } 4726 } 4727 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4728 k++; 4729 buf_si += len_si[proc]; 4730 } 4731 4732 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4733 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4734 4735 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4736 for (i=0; i<merge->nrecv; i++){ 4737 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4738 } 4739 4740 ierr = PetscFree(len_si);CHKERRQ(ierr); 4741 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4742 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4743 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4744 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4745 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4746 ierr = PetscFree(status);CHKERRQ(ierr); 4747 4748 /* compute a local seq matrix in each processor */ 4749 /*----------------------------------------------*/ 4750 /* allocate bi array and free space for accumulating nonzero column info */ 4751 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4752 bi[0] = 0; 4753 4754 /* create and initialize a linked list */ 4755 nlnk = N+1; 4756 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4757 4758 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4759 len = 0; 4760 len = ai[owners[rank+1]] - ai[owners[rank]]; 4761 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4762 current_space = free_space; 4763 4764 /* determine symbolic info for each local row */ 4765 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4766 4767 for (k=0; k<merge->nrecv; k++){ 4768 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4769 nrows = *buf_ri_k[k]; 4770 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4771 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4772 } 4773 4774 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4775 len = 0; 4776 for (i=0;i<m;i++) { 4777 bnzi = 0; 4778 /* add local non-zero cols of this proc's seqmat into lnk */ 4779 arow = owners[rank] + i; 4780 anzi = ai[arow+1] - ai[arow]; 4781 aj = a->j + ai[arow]; 4782 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4783 bnzi += nlnk; 4784 /* add received col data into lnk */ 4785 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4786 if (i == *nextrow[k]) { /* i-th row */ 4787 anzi = *(nextai[k]+1) - *nextai[k]; 4788 aj = buf_rj[k] + *nextai[k]; 4789 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4790 bnzi += nlnk; 4791 nextrow[k]++; nextai[k]++; 4792 } 4793 } 4794 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4795 4796 /* if free space is not available, make more free space */ 4797 if (current_space->local_remaining<bnzi) { 4798 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4799 nspacedouble++; 4800 } 4801 /* copy data into free space, then initialize lnk */ 4802 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4803 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4804 4805 current_space->array += bnzi; 4806 current_space->local_used += bnzi; 4807 current_space->local_remaining -= bnzi; 4808 4809 bi[i+1] = bi[i] + bnzi; 4810 } 4811 4812 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4813 4814 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4815 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4816 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4817 4818 /* create symbolic parallel matrix B_mpi */ 4819 /*---------------------------------------*/ 4820 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4821 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4822 if (n==PETSC_DECIDE) { 4823 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4824 } else { 4825 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4826 } 4827 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4828 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4829 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4830 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4831 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4832 4833 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4834 B_mpi->assembled = PETSC_FALSE; 4835 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4836 merge->bi = bi; 4837 merge->bj = bj; 4838 merge->buf_ri = buf_ri; 4839 merge->buf_rj = buf_rj; 4840 merge->coi = PETSC_NULL; 4841 merge->coj = PETSC_NULL; 4842 merge->owners_co = PETSC_NULL; 4843 4844 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4845 4846 /* attach the supporting struct to B_mpi for reuse */ 4847 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4848 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4849 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4850 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4851 *mpimat = B_mpi; 4852 4853 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4854 PetscFunctionReturn(0); 4855 } 4856 4857 #undef __FUNCT__ 4858 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4859 /*@C 4860 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4861 matrices from each processor 4862 4863 Collective on MPI_Comm 4864 4865 Input Parameters: 4866 + comm - the communicators the parallel matrix will live on 4867 . seqmat - the input sequential matrices 4868 . m - number of local rows (or PETSC_DECIDE) 4869 . n - number of local columns (or PETSC_DECIDE) 4870 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4871 4872 Output Parameter: 4873 . mpimat - the parallel matrix generated 4874 4875 Level: advanced 4876 4877 Notes: 4878 The dimensions of the sequential matrix in each processor MUST be the same. 4879 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4880 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4881 @*/ 4882 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4883 { 4884 PetscErrorCode ierr; 4885 PetscMPIInt size; 4886 4887 PetscFunctionBegin; 4888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4889 if (size == 1){ 4890 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4891 if (scall == MAT_INITIAL_MATRIX){ 4892 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4893 } else { 4894 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4895 } 4896 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4897 PetscFunctionReturn(0); 4898 } 4899 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4900 if (scall == MAT_INITIAL_MATRIX){ 4901 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4902 } 4903 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4904 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4905 PetscFunctionReturn(0); 4906 } 4907 4908 #undef __FUNCT__ 4909 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4910 /*@ 4911 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4912 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4913 with MatGetSize() 4914 4915 Not Collective 4916 4917 Input Parameters: 4918 + A - the matrix 4919 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4920 4921 Output Parameter: 4922 . A_loc - the local sequential matrix generated 4923 4924 Level: developer 4925 4926 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4927 4928 @*/ 4929 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4930 { 4931 PetscErrorCode ierr; 4932 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4933 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4934 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4935 MatScalar *aa=a->a,*ba=b->a,*cam; 4936 PetscScalar *ca; 4937 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4938 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4939 PetscBool match; 4940 4941 PetscFunctionBegin; 4942 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4943 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4944 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4945 if (scall == MAT_INITIAL_MATRIX){ 4946 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4947 ci[0] = 0; 4948 for (i=0; i<am; i++){ 4949 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4950 } 4951 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4952 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4953 k = 0; 4954 for (i=0; i<am; i++) { 4955 ncols_o = bi[i+1] - bi[i]; 4956 ncols_d = ai[i+1] - ai[i]; 4957 /* off-diagonal portion of A */ 4958 for (jo=0; jo<ncols_o; jo++) { 4959 col = cmap[*bj]; 4960 if (col >= cstart) break; 4961 cj[k] = col; bj++; 4962 ca[k++] = *ba++; 4963 } 4964 /* diagonal portion of A */ 4965 for (j=0; j<ncols_d; j++) { 4966 cj[k] = cstart + *aj++; 4967 ca[k++] = *aa++; 4968 } 4969 /* off-diagonal portion of A */ 4970 for (j=jo; j<ncols_o; j++) { 4971 cj[k] = cmap[*bj++]; 4972 ca[k++] = *ba++; 4973 } 4974 } 4975 /* put together the new matrix */ 4976 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4977 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4978 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4979 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4980 mat->free_a = PETSC_TRUE; 4981 mat->free_ij = PETSC_TRUE; 4982 mat->nonew = 0; 4983 } else if (scall == MAT_REUSE_MATRIX){ 4984 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4985 ci = mat->i; cj = mat->j; cam = mat->a; 4986 for (i=0; i<am; i++) { 4987 /* off-diagonal portion of A */ 4988 ncols_o = bi[i+1] - bi[i]; 4989 for (jo=0; jo<ncols_o; jo++) { 4990 col = cmap[*bj]; 4991 if (col >= cstart) break; 4992 *cam++ = *ba++; bj++; 4993 } 4994 /* diagonal portion of A */ 4995 ncols_d = ai[i+1] - ai[i]; 4996 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4997 /* off-diagonal portion of A */ 4998 for (j=jo; j<ncols_o; j++) { 4999 *cam++ = *ba++; bj++; 5000 } 5001 } 5002 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5003 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5004 PetscFunctionReturn(0); 5005 } 5006 5007 #undef __FUNCT__ 5008 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5009 /*@C 5010 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5011 5012 Not Collective 5013 5014 Input Parameters: 5015 + A - the matrix 5016 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5017 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5018 5019 Output Parameter: 5020 . A_loc - the local sequential matrix generated 5021 5022 Level: developer 5023 5024 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5025 5026 @*/ 5027 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5028 { 5029 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5030 PetscErrorCode ierr; 5031 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5032 IS isrowa,iscola; 5033 Mat *aloc; 5034 PetscBool match; 5035 5036 PetscFunctionBegin; 5037 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5038 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5039 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5040 if (!row){ 5041 start = A->rmap->rstart; end = A->rmap->rend; 5042 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5043 } else { 5044 isrowa = *row; 5045 } 5046 if (!col){ 5047 start = A->cmap->rstart; 5048 cmap = a->garray; 5049 nzA = a->A->cmap->n; 5050 nzB = a->B->cmap->n; 5051 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5052 ncols = 0; 5053 for (i=0; i<nzB; i++) { 5054 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5055 else break; 5056 } 5057 imark = i; 5058 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5059 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5060 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5061 } else { 5062 iscola = *col; 5063 } 5064 if (scall != MAT_INITIAL_MATRIX){ 5065 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5066 aloc[0] = *A_loc; 5067 } 5068 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5069 *A_loc = aloc[0]; 5070 ierr = PetscFree(aloc);CHKERRQ(ierr); 5071 if (!row){ 5072 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5073 } 5074 if (!col){ 5075 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5076 } 5077 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5078 PetscFunctionReturn(0); 5079 } 5080 5081 #undef __FUNCT__ 5082 #define __FUNCT__ "MatGetBrowsOfAcols" 5083 /*@C 5084 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5085 5086 Collective on Mat 5087 5088 Input Parameters: 5089 + A,B - the matrices in mpiaij format 5090 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5091 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5092 5093 Output Parameter: 5094 + rowb, colb - index sets of rows and columns of B to extract 5095 - B_seq - the sequential matrix generated 5096 5097 Level: developer 5098 5099 @*/ 5100 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5101 { 5102 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5103 PetscErrorCode ierr; 5104 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5105 IS isrowb,iscolb; 5106 Mat *bseq=PETSC_NULL; 5107 5108 PetscFunctionBegin; 5109 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5110 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5111 } 5112 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5113 5114 if (scall == MAT_INITIAL_MATRIX){ 5115 start = A->cmap->rstart; 5116 cmap = a->garray; 5117 nzA = a->A->cmap->n; 5118 nzB = a->B->cmap->n; 5119 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5120 ncols = 0; 5121 for (i=0; i<nzB; i++) { /* row < local row index */ 5122 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5123 else break; 5124 } 5125 imark = i; 5126 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5127 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5128 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5129 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5130 } else { 5131 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5132 isrowb = *rowb; iscolb = *colb; 5133 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5134 bseq[0] = *B_seq; 5135 } 5136 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5137 *B_seq = bseq[0]; 5138 ierr = PetscFree(bseq);CHKERRQ(ierr); 5139 if (!rowb){ 5140 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5141 } else { 5142 *rowb = isrowb; 5143 } 5144 if (!colb){ 5145 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5146 } else { 5147 *colb = iscolb; 5148 } 5149 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5150 PetscFunctionReturn(0); 5151 } 5152 5153 #undef __FUNCT__ 5154 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5155 /* 5156 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5157 of the OFF-DIAGONAL portion of local A 5158 5159 Collective on Mat 5160 5161 Input Parameters: 5162 + A,B - the matrices in mpiaij format 5163 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5164 5165 Output Parameter: 5166 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5167 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5168 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5169 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5170 5171 Level: developer 5172 5173 */ 5174 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5175 { 5176 VecScatter_MPI_General *gen_to,*gen_from; 5177 PetscErrorCode ierr; 5178 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5179 Mat_SeqAIJ *b_oth; 5180 VecScatter ctx=a->Mvctx; 5181 MPI_Comm comm=((PetscObject)ctx)->comm; 5182 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5183 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5184 PetscScalar *rvalues,*svalues; 5185 MatScalar *b_otha,*bufa,*bufA; 5186 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5187 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5188 MPI_Status *sstatus,rstatus; 5189 PetscMPIInt jj; 5190 PetscInt *cols,sbs,rbs; 5191 PetscScalar *vals; 5192 5193 PetscFunctionBegin; 5194 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5195 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5196 } 5197 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5198 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5199 5200 gen_to = (VecScatter_MPI_General*)ctx->todata; 5201 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5202 rvalues = gen_from->values; /* holds the length of receiving row */ 5203 svalues = gen_to->values; /* holds the length of sending row */ 5204 nrecvs = gen_from->n; 5205 nsends = gen_to->n; 5206 5207 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5208 srow = gen_to->indices; /* local row index to be sent */ 5209 sstarts = gen_to->starts; 5210 sprocs = gen_to->procs; 5211 sstatus = gen_to->sstatus; 5212 sbs = gen_to->bs; 5213 rstarts = gen_from->starts; 5214 rprocs = gen_from->procs; 5215 rbs = gen_from->bs; 5216 5217 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5218 if (scall == MAT_INITIAL_MATRIX){ 5219 /* i-array */ 5220 /*---------*/ 5221 /* post receives */ 5222 for (i=0; i<nrecvs; i++){ 5223 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5224 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5225 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5226 } 5227 5228 /* pack the outgoing message */ 5229 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5230 sstartsj[0] = 0; rstartsj[0] = 0; 5231 len = 0; /* total length of j or a array to be sent */ 5232 k = 0; 5233 for (i=0; i<nsends; i++){ 5234 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5235 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5236 for (j=0; j<nrows; j++) { 5237 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5238 for (l=0; l<sbs; l++){ 5239 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5240 rowlen[j*sbs+l] = ncols; 5241 len += ncols; 5242 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5243 } 5244 k++; 5245 } 5246 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5247 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5248 } 5249 /* recvs and sends of i-array are completed */ 5250 i = nrecvs; 5251 while (i--) { 5252 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5253 } 5254 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5255 5256 /* allocate buffers for sending j and a arrays */ 5257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5258 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5259 5260 /* create i-array of B_oth */ 5261 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5262 b_othi[0] = 0; 5263 len = 0; /* total length of j or a array to be received */ 5264 k = 0; 5265 for (i=0; i<nrecvs; i++){ 5266 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5267 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5268 for (j=0; j<nrows; j++) { 5269 b_othi[k+1] = b_othi[k] + rowlen[j]; 5270 len += rowlen[j]; k++; 5271 } 5272 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5273 } 5274 5275 /* allocate space for j and a arrrays of B_oth */ 5276 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5277 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5278 5279 /* j-array */ 5280 /*---------*/ 5281 /* post receives of j-array */ 5282 for (i=0; i<nrecvs; i++){ 5283 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5284 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5285 } 5286 5287 /* pack the outgoing message j-array */ 5288 k = 0; 5289 for (i=0; i<nsends; i++){ 5290 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5291 bufJ = bufj+sstartsj[i]; 5292 for (j=0; j<nrows; j++) { 5293 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5294 for (ll=0; ll<sbs; ll++){ 5295 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5296 for (l=0; l<ncols; l++){ 5297 *bufJ++ = cols[l]; 5298 } 5299 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5300 } 5301 } 5302 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5303 } 5304 5305 /* recvs and sends of j-array are completed */ 5306 i = nrecvs; 5307 while (i--) { 5308 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5309 } 5310 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5311 } else if (scall == MAT_REUSE_MATRIX){ 5312 sstartsj = *startsj_s; 5313 rstartsj = *startsj_r; 5314 bufa = *bufa_ptr; 5315 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5316 b_otha = b_oth->a; 5317 } else { 5318 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5319 } 5320 5321 /* a-array */ 5322 /*---------*/ 5323 /* post receives of a-array */ 5324 for (i=0; i<nrecvs; i++){ 5325 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5326 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5327 } 5328 5329 /* pack the outgoing message a-array */ 5330 k = 0; 5331 for (i=0; i<nsends; i++){ 5332 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5333 bufA = bufa+sstartsj[i]; 5334 for (j=0; j<nrows; j++) { 5335 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5336 for (ll=0; ll<sbs; ll++){ 5337 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5338 for (l=0; l<ncols; l++){ 5339 *bufA++ = vals[l]; 5340 } 5341 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5342 } 5343 } 5344 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5345 } 5346 /* recvs and sends of a-array are completed */ 5347 i = nrecvs; 5348 while (i--) { 5349 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5350 } 5351 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5352 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5353 5354 if (scall == MAT_INITIAL_MATRIX){ 5355 /* put together the new matrix */ 5356 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5357 5358 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5359 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5360 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5361 b_oth->free_a = PETSC_TRUE; 5362 b_oth->free_ij = PETSC_TRUE; 5363 b_oth->nonew = 0; 5364 5365 ierr = PetscFree(bufj);CHKERRQ(ierr); 5366 if (!startsj_s || !bufa_ptr){ 5367 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5368 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5369 } else { 5370 *startsj_s = sstartsj; 5371 *startsj_r = rstartsj; 5372 *bufa_ptr = bufa; 5373 } 5374 } 5375 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5376 PetscFunctionReturn(0); 5377 } 5378 5379 #undef __FUNCT__ 5380 #define __FUNCT__ "MatGetCommunicationStructs" 5381 /*@C 5382 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5383 5384 Not Collective 5385 5386 Input Parameters: 5387 . A - The matrix in mpiaij format 5388 5389 Output Parameter: 5390 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5391 . colmap - A map from global column index to local index into lvec 5392 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5393 5394 Level: developer 5395 5396 @*/ 5397 #if defined (PETSC_USE_CTABLE) 5398 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5399 #else 5400 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5401 #endif 5402 { 5403 Mat_MPIAIJ *a; 5404 5405 PetscFunctionBegin; 5406 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5407 PetscValidPointer(lvec, 2); 5408 PetscValidPointer(colmap, 3); 5409 PetscValidPointer(multScatter, 4); 5410 a = (Mat_MPIAIJ *) A->data; 5411 if (lvec) *lvec = a->lvec; 5412 if (colmap) *colmap = a->colmap; 5413 if (multScatter) *multScatter = a->Mvctx; 5414 PetscFunctionReturn(0); 5415 } 5416 5417 EXTERN_C_BEGIN 5418 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5419 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5420 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5421 EXTERN_C_END 5422 5423 #undef __FUNCT__ 5424 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5425 /* 5426 Computes (B'*A')' since computing B*A directly is untenable 5427 5428 n p p 5429 ( ) ( ) ( ) 5430 m ( A ) * n ( B ) = m ( C ) 5431 ( ) ( ) ( ) 5432 5433 */ 5434 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5435 { 5436 PetscErrorCode ierr; 5437 Mat At,Bt,Ct; 5438 5439 PetscFunctionBegin; 5440 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5441 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5442 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5443 ierr = MatDestroy(&At);CHKERRQ(ierr); 5444 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5445 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5446 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5447 PetscFunctionReturn(0); 5448 } 5449 5450 #undef __FUNCT__ 5451 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5452 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5453 { 5454 PetscErrorCode ierr; 5455 PetscInt m=A->rmap->n,n=B->cmap->n; 5456 Mat Cmat; 5457 5458 PetscFunctionBegin; 5459 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5460 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5461 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5462 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5463 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5464 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5465 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5466 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5467 *C = Cmat; 5468 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5469 PetscFunctionReturn(0); 5470 } 5471 5472 /* ----------------------------------------------------------------*/ 5473 #undef __FUNCT__ 5474 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5475 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5476 { 5477 PetscErrorCode ierr; 5478 5479 PetscFunctionBegin; 5480 if (scall == MAT_INITIAL_MATRIX){ 5481 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5482 } 5483 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5484 PetscFunctionReturn(0); 5485 } 5486 5487 EXTERN_C_BEGIN 5488 #if defined(PETSC_HAVE_MUMPS) 5489 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5490 #endif 5491 #if defined(PETSC_HAVE_PASTIX) 5492 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5493 #endif 5494 #if defined(PETSC_HAVE_SUPERLU_DIST) 5495 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5496 #endif 5497 #if defined(PETSC_HAVE_SPOOLES) 5498 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5499 #endif 5500 EXTERN_C_END 5501 5502 /*MC 5503 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5504 5505 Options Database Keys: 5506 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5507 5508 Level: beginner 5509 5510 .seealso: MatCreateAIJ() 5511 M*/ 5512 5513 EXTERN_C_BEGIN 5514 #undef __FUNCT__ 5515 #define __FUNCT__ "MatCreate_MPIAIJ" 5516 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5517 { 5518 Mat_MPIAIJ *b; 5519 PetscErrorCode ierr; 5520 PetscMPIInt size; 5521 5522 PetscFunctionBegin; 5523 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5524 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5525 B->data = (void*)b; 5526 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5527 B->assembled = PETSC_FALSE; 5528 B->insertmode = NOT_SET_VALUES; 5529 b->size = size; 5530 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5531 5532 /* build cache for off array entries formed */ 5533 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5534 b->donotstash = PETSC_FALSE; 5535 b->colmap = 0; 5536 b->garray = 0; 5537 b->roworiented = PETSC_TRUE; 5538 5539 /* stuff used for matrix vector multiply */ 5540 b->lvec = PETSC_NULL; 5541 b->Mvctx = PETSC_NULL; 5542 5543 /* stuff for MatGetRow() */ 5544 b->rowindices = 0; 5545 b->rowvalues = 0; 5546 b->getrowactive = PETSC_FALSE; 5547 5548 /* flexible pointer used in CUSP/CUSPARSE classes */ 5549 b->spptr = PETSC_NULL; 5550 #if defined(PETSC_HAVE_SPOOLES) 5551 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5552 "MatGetFactor_mpiaij_spooles", 5553 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5554 #endif 5555 #if defined(PETSC_HAVE_MUMPS) 5556 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5557 "MatGetFactor_aij_mumps", 5558 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5559 #endif 5560 #if defined(PETSC_HAVE_PASTIX) 5561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5562 "MatGetFactor_mpiaij_pastix", 5563 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5564 #endif 5565 #if defined(PETSC_HAVE_SUPERLU_DIST) 5566 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5567 "MatGetFactor_mpiaij_superlu_dist", 5568 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5569 #endif 5570 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5571 "MatStoreValues_MPIAIJ", 5572 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5573 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5574 "MatRetrieveValues_MPIAIJ", 5575 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5576 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5577 "MatGetDiagonalBlock_MPIAIJ", 5578 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5579 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5580 "MatIsTranspose_MPIAIJ", 5581 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5583 "MatMPIAIJSetPreallocation_MPIAIJ", 5584 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5586 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5587 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5589 "MatDiagonalScaleLocal_MPIAIJ", 5590 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5592 "MatConvert_MPIAIJ_MPIAIJPERM", 5593 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5595 "MatConvert_MPIAIJ_MPIAIJCRL", 5596 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5598 "MatConvert_MPIAIJ_MPISBAIJ", 5599 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5601 "MatMatMult_MPIDense_MPIAIJ", 5602 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5604 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5605 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5607 "MatMatMultNumeric_MPIDense_MPIAIJ", 5608 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5610 PetscFunctionReturn(0); 5611 } 5612 EXTERN_C_END 5613 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5616 /*@ 5617 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5618 and "off-diagonal" part of the matrix in CSR format. 5619 5620 Collective on MPI_Comm 5621 5622 Input Parameters: 5623 + comm - MPI communicator 5624 . m - number of local rows (Cannot be PETSC_DECIDE) 5625 . n - This value should be the same as the local size used in creating the 5626 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5627 calculated if N is given) For square matrices n is almost always m. 5628 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5629 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5630 . i - row indices for "diagonal" portion of matrix 5631 . j - column indices 5632 . a - matrix values 5633 . oi - row indices for "off-diagonal" portion of matrix 5634 . oj - column indices 5635 - oa - matrix values 5636 5637 Output Parameter: 5638 . mat - the matrix 5639 5640 Level: advanced 5641 5642 Notes: 5643 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5644 must free the arrays once the matrix has been destroyed and not before. 5645 5646 The i and j indices are 0 based 5647 5648 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5649 5650 This sets local rows and cannot be used to set off-processor values. 5651 5652 You cannot later use MatSetValues() to change values in this matrix. 5653 5654 .keywords: matrix, aij, compressed row, sparse, parallel 5655 5656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5657 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5658 @*/ 5659 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5660 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5661 { 5662 PetscErrorCode ierr; 5663 Mat_MPIAIJ *maij; 5664 5665 PetscFunctionBegin; 5666 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5667 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5668 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5669 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5670 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5671 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5672 maij = (Mat_MPIAIJ*) (*mat)->data; 5673 maij->donotstash = PETSC_TRUE; 5674 (*mat)->preallocated = PETSC_TRUE; 5675 5676 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5677 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5678 5679 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5680 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5681 5682 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 5687 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 PetscFunctionReturn(0); 5690 } 5691 5692 /* 5693 Special version for direct calls from Fortran 5694 */ 5695 #include <petsc-private/fortranimpl.h> 5696 5697 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5698 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5699 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5700 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5701 #endif 5702 5703 /* Change these macros so can be used in void function */ 5704 #undef CHKERRQ 5705 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5706 #undef SETERRQ2 5707 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5708 #undef SETERRQ3 5709 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5710 #undef SETERRQ 5711 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5712 5713 EXTERN_C_BEGIN 5714 #undef __FUNCT__ 5715 #define __FUNCT__ "matsetvaluesmpiaij_" 5716 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5717 { 5718 Mat mat = *mmat; 5719 PetscInt m = *mm, n = *mn; 5720 InsertMode addv = *maddv; 5721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5722 PetscScalar value; 5723 PetscErrorCode ierr; 5724 5725 MatCheckPreallocated(mat,1); 5726 if (mat->insertmode == NOT_SET_VALUES) { 5727 mat->insertmode = addv; 5728 } 5729 #if defined(PETSC_USE_DEBUG) 5730 else if (mat->insertmode != addv) { 5731 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5732 } 5733 #endif 5734 { 5735 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5736 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5737 PetscBool roworiented = aij->roworiented; 5738 5739 /* Some Variables required in the macro */ 5740 Mat A = aij->A; 5741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5742 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5743 MatScalar *aa = a->a; 5744 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5745 Mat B = aij->B; 5746 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5747 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5748 MatScalar *ba = b->a; 5749 5750 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5751 PetscInt nonew = a->nonew; 5752 MatScalar *ap1,*ap2; 5753 5754 PetscFunctionBegin; 5755 for (i=0; i<m; i++) { 5756 if (im[i] < 0) continue; 5757 #if defined(PETSC_USE_DEBUG) 5758 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5759 #endif 5760 if (im[i] >= rstart && im[i] < rend) { 5761 row = im[i] - rstart; 5762 lastcol1 = -1; 5763 rp1 = aj + ai[row]; 5764 ap1 = aa + ai[row]; 5765 rmax1 = aimax[row]; 5766 nrow1 = ailen[row]; 5767 low1 = 0; 5768 high1 = nrow1; 5769 lastcol2 = -1; 5770 rp2 = bj + bi[row]; 5771 ap2 = ba + bi[row]; 5772 rmax2 = bimax[row]; 5773 nrow2 = bilen[row]; 5774 low2 = 0; 5775 high2 = nrow2; 5776 5777 for (j=0; j<n; j++) { 5778 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5779 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5780 if (in[j] >= cstart && in[j] < cend){ 5781 col = in[j] - cstart; 5782 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5783 } else if (in[j] < 0) continue; 5784 #if defined(PETSC_USE_DEBUG) 5785 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5786 #endif 5787 else { 5788 if (mat->was_assembled) { 5789 if (!aij->colmap) { 5790 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5791 } 5792 #if defined (PETSC_USE_CTABLE) 5793 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5794 col--; 5795 #else 5796 col = aij->colmap[in[j]] - 1; 5797 #endif 5798 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5799 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5800 col = in[j]; 5801 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5802 B = aij->B; 5803 b = (Mat_SeqAIJ*)B->data; 5804 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5805 rp2 = bj + bi[row]; 5806 ap2 = ba + bi[row]; 5807 rmax2 = bimax[row]; 5808 nrow2 = bilen[row]; 5809 low2 = 0; 5810 high2 = nrow2; 5811 bm = aij->B->rmap->n; 5812 ba = b->a; 5813 } 5814 } else col = in[j]; 5815 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5816 } 5817 } 5818 } else { 5819 if (!aij->donotstash) { 5820 if (roworiented) { 5821 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5822 } else { 5823 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5824 } 5825 } 5826 } 5827 }} 5828 PetscFunctionReturnVoid(); 5829 } 5830 EXTERN_C_END 5831 5832