1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.49 1996/06/24 19:54:53 bsmith Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 if (!viewer) { 226 viewer = STDOUT_VIEWER_SELF; 227 } 228 229 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 230 if (vtype == MATLAB_VIEWER) { 231 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 232 } 233 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234 return MatView_SeqBAIJ_ASCII(A,viewer); 235 } 236 else if (vtype == BINARY_FILE_VIEWER) { 237 return MatView_SeqBAIJ_Binary(A,viewer); 238 } 239 else if (vtype == DRAW_VIEWER) { 240 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 241 } 242 return 0; 243 } 244 245 #define CHUNKSIZE 10 246 247 /* This version has row oriented v */ 248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 254 int ridx,cidx,bs2=a->bs2; 255 Scalar *ap,value,*aa=a->a,*bap; 256 257 for ( k=0; k<m; k++ ) { /* loop over added rows */ 258 row = im[k]; brow = row/bs; 259 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262 rmax = imax[brow]; nrow = ailen[brow]; 263 low = 0; 264 for ( l=0; l<n; l++ ) { /* loop over added columns */ 265 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267 col = in[l]; bcol = col/bs; 268 ridx = row % bs; cidx = col % bs; 269 if (roworiented) { 270 value = *v++; 271 } 272 else { 273 value = v[k + l*m]; 274 } 275 if (!sorted) low = 0; high = nrow; 276 while (high-low > 5) { 277 t = (low+high)/2; 278 if (rp[t] > bcol) high = t; 279 else low = t; 280 } 281 for ( i=low; i<high; i++ ) { 282 if (rp[i] > bcol) break; 283 if (rp[i] == bcol) { 284 bap = ap + bs2*i + bs*cidx + ridx; 285 if (is == ADD_VALUES) *bap += value; 286 else *bap = value; 287 goto noinsert; 288 } 289 } 290 if (nonew) goto noinsert; 291 if (nrow >= rmax) { 292 /* there is no extra room in row, therefore enlarge */ 293 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294 Scalar *new_a; 295 296 /* malloc new storage space */ 297 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 299 new_j = (int *) (new_a + bs2*new_nz); 300 new_i = new_j + new_nz; 301 302 /* copy over old data into new slots */ 303 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 304 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308 len*sizeof(int)); 309 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313 /* free up old matrix storage */ 314 PetscFree(a->a); 315 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317 a->singlemalloc = 1; 318 319 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 321 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 322 a->maxnz += bs2*CHUNKSIZE; 323 a->reallocs++; 324 a->nz++; 325 } 326 N = nrow++ - 1; 327 /* shift up all the later entries in this row */ 328 for ( ii=N; ii>=i; ii-- ) { 329 rp[ii+1] = rp[ii]; 330 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331 } 332 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333 rp[i] = bcol; 334 ap[bs2*i + bs*cidx + ridx] = value; 335 noinsert:; 336 low = i; 337 } 338 ailen[brow] = nrow; 339 } 340 return 0; 341 } 342 343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 *m = a->m; *n = a->n; 347 return 0; 348 } 349 350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = 0; *n = a->m; 354 return 0; 355 } 356 357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 361 Scalar *aa,*v_i,*aa_i; 362 363 bs = a->bs; 364 ai = a->i; 365 aj = a->j; 366 aa = a->a; 367 bs2 = a->bs2; 368 369 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 370 371 bn = row/bs; /* Block number */ 372 bp = row % bs; /* Block Position */ 373 M = ai[bn+1] - ai[bn]; 374 *nz = bs*M; 375 376 if (v) { 377 *v = 0; 378 if (*nz) { 379 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 380 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 381 v_i = *v + i*bs; 382 aa_i = aa + bs2*(ai[bn] + i); 383 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 384 } 385 } 386 } 387 388 if (idx) { 389 *idx = 0; 390 if (*nz) { 391 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 392 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 393 idx_i = *idx + i*bs; 394 itmp = bs*aj[ai[bn] + i]; 395 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 396 } 397 } 398 } 399 return 0; 400 } 401 402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 403 { 404 if (idx) {if (*idx) PetscFree(*idx);} 405 if (v) {if (*v) PetscFree(*v);} 406 return 0; 407 } 408 409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410 { 411 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412 Mat C; 413 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 414 int *rows,*cols,bs2=a->bs2; 415 Scalar *array=a->a; 416 417 if (B==PETSC_NULL && mbs!=nbs) 418 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420 PetscMemzero(col,(1+nbs)*sizeof(int)); 421 422 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424 PetscFree(col); 425 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426 cols = rows + bs; 427 for ( i=0; i<mbs; i++ ) { 428 cols[0] = i*bs; 429 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430 len = ai[i+1] - ai[i]; 431 for ( j=0; j<len; j++ ) { 432 rows[0] = (*aj++)*bs; 433 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435 array += bs2; 436 } 437 } 438 PetscFree(rows); 439 440 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 443 if (B != PETSC_NULL) { 444 *B = C; 445 } else { 446 /* This isn't really an in-place transpose */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449 if (a->diag) PetscFree(a->diag); 450 if (a->ilen) PetscFree(a->ilen); 451 if (a->imax) PetscFree(a->imax); 452 if (a->solve_work) PetscFree(a->solve_work); 453 PetscFree(a); 454 PetscMemcpy(A,C,sizeof(struct _Mat)); 455 PetscHeaderDestroy(C); 456 } 457 return 0; 458 } 459 460 461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462 { 463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465 int m = a->m,*ip, N, *ailen = a->ilen; 466 int mbs = a->mbs, bs2 = a->bs2; 467 Scalar *aa = a->a, *ap; 468 469 if (mode == FLUSH_ASSEMBLY) return 0; 470 471 for ( i=1; i<mbs; i++ ) { 472 /* move each row back by the amount of empty slots (fshift) before it*/ 473 fshift += imax[i-1] - ailen[i-1]; 474 if (fshift) { 475 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476 N = ailen[i]; 477 for ( j=0; j<N; j++ ) { 478 ip[j-fshift] = ip[j]; 479 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480 } 481 } 482 ai[i] = ai[i-1] + ailen[i-1]; 483 } 484 if (mbs) { 485 fshift += imax[mbs-1] - ailen[mbs-1]; 486 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487 } 488 /* reset ilen and imax for each row */ 489 for ( i=0; i<mbs; i++ ) { 490 ailen[i] = imax[i] = ai[i+1] - ai[i]; 491 } 492 a->nz = ai[mbs]; 493 494 /* diagonals may have moved, so kill the diagonal pointers */ 495 if (fshift && a->diag) { 496 PetscFree(a->diag); 497 PLogObjectMemory(A,-(m+1)*sizeof(int)); 498 a->diag = 0; 499 } 500 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502 a->reallocs); 503 return 0; 504 } 505 506 static int MatZeroEntries_SeqBAIJ(Mat A) 507 { 508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 509 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 510 return 0; 511 } 512 513 int MatDestroy_SeqBAIJ(PetscObject obj) 514 { 515 Mat A = (Mat) obj; 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 517 518 #if defined(PETSC_LOG) 519 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 520 #endif 521 PetscFree(a->a); 522 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 523 if (a->diag) PetscFree(a->diag); 524 if (a->ilen) PetscFree(a->ilen); 525 if (a->imax) PetscFree(a->imax); 526 if (a->solve_work) PetscFree(a->solve_work); 527 if (a->mult_work) PetscFree(a->mult_work); 528 PetscFree(a); 529 PLogObjectDestroy(A); 530 PetscHeaderDestroy(A); 531 return 0; 532 } 533 534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 537 if (op == ROW_ORIENTED) a->roworiented = 1; 538 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 539 else if (op == COLUMNS_SORTED) a->sorted = 1; 540 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 541 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 542 else if (op == ROWS_SORTED || 543 op == SYMMETRIC_MATRIX || 544 op == STRUCTURALLY_SYMMETRIC_MATRIX || 545 op == YES_NEW_DIAGONALS) 546 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 547 else if (op == NO_NEW_DIAGONALS) 548 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 549 else 550 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 551 return 0; 552 } 553 554 555 /* -------------------------------------------------------*/ 556 /* Should check that shapes of vectors and matrices match */ 557 /* -------------------------------------------------------*/ 558 #include "pinclude/plapack.h" 559 560 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 561 { 562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 563 Scalar *xg,*zg; 564 register Scalar *x,*z,*v,sum; 565 int mbs=a->mbs,i,*idx,*ii,n,ierr; 566 567 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 568 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 569 570 idx = a->j; 571 v = a->a; 572 ii = a->i; 573 574 for ( i=0; i<mbs; i++ ) { 575 n = ii[1] - ii[0]; ii++; 576 sum = 0.0; 577 while (n--) sum += *v++ * x[*idx++]; 578 z[i] = sum; 579 } 580 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 581 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 582 PLogFlops(2*a->nz - a->m); 583 return 0; 584 } 585 586 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 587 { 588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 589 Scalar *xg,*zg; 590 register Scalar *x,*z,*v,*xb,sum1,sum2; 591 register Scalar x1,x2; 592 int mbs=a->mbs,i,*idx,*ii; 593 int j,n,ierr; 594 595 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 596 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 597 598 idx = a->j; 599 v = a->a; 600 ii = a->i; 601 602 for ( i=0; i<mbs; i++ ) { 603 n = ii[1] - ii[0]; ii++; 604 sum1 = 0.0; sum2 = 0.0; 605 for ( j=0; j<n; j++ ) { 606 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 607 sum1 += v[0]*x1 + v[2]*x2; 608 sum2 += v[1]*x1 + v[3]*x2; 609 v += 4; 610 } 611 z[0] = sum1; z[1] = sum2; 612 z += 2; 613 } 614 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 616 PLogFlops(4*a->nz - a->m); 617 return 0; 618 } 619 620 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 621 { 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 623 Scalar *xg,*zg; 624 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 625 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 626 627 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 628 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 629 630 idx = a->j; 631 v = a->a; 632 ii = a->i; 633 634 for ( i=0; i<mbs; i++ ) { 635 n = ii[1] - ii[0]; ii++; 636 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 637 for ( j=0; j<n; j++ ) { 638 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 639 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 640 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 641 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 642 v += 9; 643 } 644 z[0] = sum1; z[1] = sum2; z[2] = sum3; 645 z += 3; 646 } 647 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 649 PLogFlops(18*a->nz - a->m); 650 return 0; 651 } 652 653 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 654 { 655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 656 Scalar *xg,*zg; 657 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 658 register Scalar x1,x2,x3,x4; 659 int mbs=a->mbs,i,*idx,*ii; 660 int j,n,ierr; 661 662 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 663 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 664 665 idx = a->j; 666 v = a->a; 667 ii = a->i; 668 669 for ( i=0; i<mbs; i++ ) { 670 n = ii[1] - ii[0]; ii++; 671 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 672 for ( j=0; j<n; j++ ) { 673 xb = x + 4*(*idx++); 674 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 675 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 676 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 677 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 678 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 679 v += 16; 680 } 681 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 682 z += 4; 683 } 684 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 685 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 686 PLogFlops(32*a->nz - a->m); 687 return 0; 688 } 689 690 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 693 Scalar *xg,*zg; 694 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 695 register Scalar x1,x2,x3,x4,x5; 696 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 697 698 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 699 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 700 701 idx = a->j; 702 v = a->a; 703 ii = a->i; 704 705 for ( i=0; i<mbs; i++ ) { 706 n = ii[1] - ii[0]; ii++; 707 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 708 for ( j=0; j<n; j++ ) { 709 xb = x + 5*(*idx++); 710 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 711 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 712 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 713 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 714 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 715 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 716 v += 25; 717 } 718 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 719 z += 5; 720 } 721 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 722 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 723 PLogFlops(50*a->nz - a->m); 724 return 0; 725 } 726 727 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 728 { 729 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 730 Scalar *xg,*zg; 731 register Scalar *x,*z,*v,*xb; 732 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 733 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 734 735 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 736 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 737 738 idx = a->j; 739 v = a->a; 740 ii = a->i; 741 742 743 if (!a->mult_work) { 744 k = PetscMax(a->m,a->n); 745 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 746 } 747 work = a->mult_work; 748 for ( i=0; i<mbs; i++ ) { 749 n = ii[1] - ii[0]; ii++; 750 ncols = n*bs; 751 workt = work; 752 for ( j=0; j<n; j++ ) { 753 xb = x + bs*(*idx++); 754 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 755 workt += bs; 756 } 757 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 758 v += n*bs2; 759 z += bs; 760 } 761 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 762 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 763 PLogFlops(2*a->nz*bs2 - a->m); 764 return 0; 765 } 766 767 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 770 Scalar *xg,*yg,*zg; 771 register Scalar *x,*y,*z,*v,sum; 772 int mbs=a->mbs,i,*idx,*ii,n,ierr; 773 774 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 775 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 776 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 777 778 idx = a->j; 779 v = a->a; 780 ii = a->i; 781 782 for ( i=0; i<mbs; i++ ) { 783 n = ii[1] - ii[0]; ii++; 784 sum = y[i]; 785 while (n--) sum += *v++ * x[*idx++]; 786 z[i] = sum; 787 } 788 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 789 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 790 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 791 PLogFlops(2*a->nz); 792 return 0; 793 } 794 795 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798 Scalar *xg,*yg,*zg; 799 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 800 register Scalar x1,x2; 801 int mbs=a->mbs,i,*idx,*ii; 802 int j,n,ierr; 803 804 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 805 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 806 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 807 808 idx = a->j; 809 v = a->a; 810 ii = a->i; 811 812 for ( i=0; i<mbs; i++ ) { 813 n = ii[1] - ii[0]; ii++; 814 sum1 = y[0]; sum2 = y[1]; 815 for ( j=0; j<n; j++ ) { 816 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 817 sum1 += v[0]*x1 + v[2]*x2; 818 sum2 += v[1]*x1 + v[3]*x2; 819 v += 4; 820 } 821 z[0] = sum1; z[1] = sum2; 822 z += 2; y += 2; 823 } 824 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 825 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 826 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 827 PLogFlops(4*a->nz); 828 return 0; 829 } 830 831 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 834 Scalar *xg,*yg,*zg; 835 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 836 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 837 838 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 839 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 840 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 841 842 idx = a->j; 843 v = a->a; 844 ii = a->i; 845 846 for ( i=0; i<mbs; i++ ) { 847 n = ii[1] - ii[0]; ii++; 848 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 849 for ( j=0; j<n; j++ ) { 850 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 851 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 852 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 853 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 854 v += 9; 855 } 856 z[0] = sum1; z[1] = sum2; z[2] = sum3; 857 z += 3; y += 3; 858 } 859 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 860 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 861 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 862 PLogFlops(18*a->nz); 863 return 0; 864 } 865 866 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 867 { 868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 869 Scalar *xg,*yg,*zg; 870 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 871 register Scalar x1,x2,x3,x4; 872 int mbs=a->mbs,i,*idx,*ii; 873 int j,n,ierr; 874 875 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 876 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 877 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 878 879 idx = a->j; 880 v = a->a; 881 ii = a->i; 882 883 for ( i=0; i<mbs; i++ ) { 884 n = ii[1] - ii[0]; ii++; 885 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 886 for ( j=0; j<n; j++ ) { 887 xb = x + 4*(*idx++); 888 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 889 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 890 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 891 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 892 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 893 v += 16; 894 } 895 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 896 z += 4; y += 4; 897 } 898 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 899 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 900 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 901 PLogFlops(32*a->nz); 902 return 0; 903 } 904 905 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 906 { 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 908 Scalar *xg,*yg,*zg; 909 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 910 register Scalar x1,x2,x3,x4,x5; 911 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 912 913 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 914 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 915 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 916 917 idx = a->j; 918 v = a->a; 919 ii = a->i; 920 921 for ( i=0; i<mbs; i++ ) { 922 n = ii[1] - ii[0]; ii++; 923 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 924 for ( j=0; j<n; j++ ) { 925 xb = x + 5*(*idx++); 926 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 927 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 928 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 929 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 930 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 931 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 932 v += 25; 933 } 934 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 935 z += 5; y += 5; 936 } 937 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 938 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 939 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 940 PLogFlops(50*a->nz); 941 return 0; 942 } 943 944 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 947 Scalar *xg,*zg; 948 register Scalar *x,*z,*v,*xb; 949 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 950 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 951 952 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 953 954 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 955 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 956 957 idx = a->j; 958 v = a->a; 959 ii = a->i; 960 961 962 if (!a->mult_work) { 963 k = PetscMax(a->m,a->n); 964 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 965 } 966 work = a->mult_work; 967 for ( i=0; i<mbs; i++ ) { 968 n = ii[1] - ii[0]; ii++; 969 ncols = n*bs; 970 workt = work; 971 for ( j=0; j<n; j++ ) { 972 xb = x + bs*(*idx++); 973 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 974 workt += bs; 975 } 976 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 977 v += n*bs2; 978 z += bs; 979 } 980 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 981 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 982 PLogFlops(2*a->nz*bs2 ); 983 return 0; 984 } 985 986 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 987 { 988 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 989 Scalar *xg,*zg,*zb; 990 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 991 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 992 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 993 994 995 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 996 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 997 PetscMemzero(z,N*sizeof(Scalar)); 998 999 idx = a->j; 1000 v = a->a; 1001 ii = a->i; 1002 1003 switch (bs) { 1004 case 1: 1005 for ( i=0; i<mbs; i++ ) { 1006 n = ii[1] - ii[0]; ii++; 1007 xb = x + i; x1 = xb[0]; 1008 ib = idx + ai[i]; 1009 for ( j=0; j<n; j++ ) { 1010 rval = ib[j]; 1011 z[rval] += *v++ * x1; 1012 } 1013 } 1014 break; 1015 case 2: 1016 for ( i=0; i<mbs; i++ ) { 1017 n = ii[1] - ii[0]; ii++; 1018 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1019 ib = idx + ai[i]; 1020 for ( j=0; j<n; j++ ) { 1021 rval = ib[j]*2; 1022 z[rval++] += v[0]*x1 + v[1]*x2; 1023 z[rval++] += v[2]*x1 + v[3]*x2; 1024 v += 4; 1025 } 1026 } 1027 break; 1028 case 3: 1029 for ( i=0; i<mbs; i++ ) { 1030 n = ii[1] - ii[0]; ii++; 1031 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1032 ib = idx + ai[i]; 1033 for ( j=0; j<n; j++ ) { 1034 rval = ib[j]*3; 1035 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1036 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1037 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1038 v += 9; 1039 } 1040 } 1041 break; 1042 case 4: 1043 for ( i=0; i<mbs; i++ ) { 1044 n = ii[1] - ii[0]; ii++; 1045 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1046 ib = idx + ai[i]; 1047 for ( j=0; j<n; j++ ) { 1048 rval = ib[j]*4; 1049 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1050 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1051 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1052 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1053 v += 16; 1054 } 1055 } 1056 break; 1057 case 5: 1058 for ( i=0; i<mbs; i++ ) { 1059 n = ii[1] - ii[0]; ii++; 1060 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1061 x4 = xb[3]; x5 = xb[4]; 1062 ib = idx + ai[i]; 1063 for ( j=0; j<n; j++ ) { 1064 rval = ib[j]*5; 1065 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1066 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1067 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1068 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1069 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1070 v += 25; 1071 } 1072 } 1073 break; 1074 /* block sizes larger then 5 by 5 are handled by BLAS */ 1075 default: { 1076 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1077 if (!a->mult_work) { 1078 k = PetscMax(a->m,a->n); 1079 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1080 CHKPTRQ(a->mult_work); 1081 } 1082 work = a->mult_work; 1083 for ( i=0; i<mbs; i++ ) { 1084 n = ii[1] - ii[0]; ii++; 1085 ncols = n*bs; 1086 PetscMemzero(work,ncols*sizeof(Scalar)); 1087 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1088 v += n*bs2; 1089 x += bs; 1090 workt = work; 1091 for ( j=0; j<n; j++ ) { 1092 zb = z + bs*(*idx++); 1093 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1094 workt += bs; 1095 } 1096 } 1097 } 1098 } 1099 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1100 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1101 return 0; 1102 } 1103 1104 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1107 if (nz) *nz = a->bs2*a->nz; 1108 if (nza) *nza = a->maxnz; 1109 if (mem) *mem = (int)A->mem; 1110 return 0; 1111 } 1112 1113 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1114 { 1115 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1116 1117 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1118 1119 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1120 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1121 (a->nz != b->nz)) { 1122 *flg = PETSC_FALSE; return 0; 1123 } 1124 1125 /* if the a->i are the same */ 1126 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1127 *flg = PETSC_FALSE; return 0; 1128 } 1129 1130 /* if a->j are the same */ 1131 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1132 *flg = PETSC_FALSE; return 0; 1133 } 1134 1135 /* if a->a are the same */ 1136 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1137 *flg = PETSC_FALSE; return 0; 1138 } 1139 *flg = PETSC_TRUE; 1140 return 0; 1141 1142 } 1143 1144 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1145 { 1146 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1147 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1148 Scalar *x, zero = 0.0,*aa,*aa_j; 1149 1150 bs = a->bs; 1151 aa = a->a; 1152 ai = a->i; 1153 aj = a->j; 1154 ambs = a->mbs; 1155 bs2 = a->bs2; 1156 1157 VecSet(&zero,v); 1158 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1159 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1160 for ( i=0; i<ambs; i++ ) { 1161 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1162 if (aj[j] == i) { 1163 row = i*bs; 1164 aa_j = aa+j*bs2; 1165 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1166 break; 1167 } 1168 } 1169 } 1170 return 0; 1171 } 1172 1173 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1174 { 1175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1176 Scalar *l,*r,x,*v,*aa,*li,*ri; 1177 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1178 1179 ai = a->i; 1180 aj = a->j; 1181 aa = a->a; 1182 m = a->m; 1183 n = a->n; 1184 bs = a->bs; 1185 mbs = a->mbs; 1186 bs2 = a->bs2; 1187 if (ll) { 1188 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1189 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1190 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1191 M = ai[i+1] - ai[i]; 1192 li = l + i*bs; 1193 v = aa + bs2*ai[i]; 1194 for ( j=0; j<M; j++ ) { /* for each block */ 1195 for ( k=0; k<bs2; k++ ) { 1196 (*v++) *= li[k%bs]; 1197 } 1198 } 1199 } 1200 } 1201 1202 if (rr) { 1203 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1204 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1205 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1206 M = ai[i+1] - ai[i]; 1207 v = aa + bs2*ai[i]; 1208 for ( j=0; j<M; j++ ) { /* for each block */ 1209 ri = r + bs*aj[ai[i]+j]; 1210 for ( k=0; k<bs; k++ ) { 1211 x = ri[k]; 1212 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1213 } 1214 } 1215 } 1216 } 1217 return 0; 1218 } 1219 1220 1221 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1222 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1223 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1224 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1225 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1226 1227 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1228 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1229 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1230 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1231 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1232 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1233 1234 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1235 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1236 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1237 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1238 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1239 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1240 1241 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1242 { 1243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1244 Scalar *v = a->a; 1245 double sum = 0.0; 1246 int i,nz=a->nz,bs2=a->bs2; 1247 1248 if (type == NORM_FROBENIUS) { 1249 for (i=0; i< bs2*nz; i++ ) { 1250 #if defined(PETSC_COMPLEX) 1251 sum += real(conj(*v)*(*v)); v++; 1252 #else 1253 sum += (*v)*(*v); v++; 1254 #endif 1255 } 1256 *norm = sqrt(sum); 1257 } 1258 else { 1259 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1260 } 1261 return 0; 1262 } 1263 1264 /* 1265 note: This can only work for identity for row and col. It would 1266 be good to check this and otherwise generate an error. 1267 */ 1268 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1269 { 1270 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1271 Mat outA; 1272 int ierr; 1273 1274 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1275 1276 outA = inA; 1277 inA->factor = FACTOR_LU; 1278 a->row = row; 1279 a->col = col; 1280 1281 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1282 1283 if (!a->diag) { 1284 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1285 } 1286 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1287 return 0; 1288 } 1289 1290 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1291 { 1292 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1293 int one = 1, totalnz = a->bs2*a->nz; 1294 BLscal_( &totalnz, alpha, a->a, &one ); 1295 PLogFlops(totalnz); 1296 return 0; 1297 } 1298 1299 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1300 { 1301 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1302 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1303 int *ai = a->i, *ailen = a->ilen; 1304 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1305 Scalar *ap, *aa = a->a, zero = 0.0; 1306 1307 for ( k=0; k<m; k++ ) { /* loop over rows */ 1308 row = im[k]; brow = row/bs; 1309 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1310 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1311 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1312 nrow = ailen[brow]; 1313 for ( l=0; l<n; l++ ) { /* loop over columns */ 1314 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1315 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1316 col = in[l] ; 1317 bcol = col/bs; 1318 cidx = col%bs; 1319 ridx = row%bs; 1320 high = nrow; 1321 low = 0; /* assume unsorted */ 1322 while (high-low > 5) { 1323 t = (low+high)/2; 1324 if (rp[t] > bcol) high = t; 1325 else low = t; 1326 } 1327 for ( i=low; i<high; i++ ) { 1328 if (rp[i] > bcol) break; 1329 if (rp[i] == bcol) { 1330 *v++ = ap[bs2*i+bs*cidx+ridx]; 1331 goto finished; 1332 } 1333 } 1334 *v++ = zero; 1335 finished:; 1336 } 1337 } 1338 return 0; 1339 } 1340 1341 /* -------------------------------------------------------------------*/ 1342 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1343 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1344 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1345 MatMultTrans_SeqBAIJ,0, 1346 MatSolve_SeqBAIJ_N,0, 1347 0,0, 1348 MatLUFactor_SeqBAIJ,0, 1349 0, 1350 MatTranspose_SeqBAIJ, 1351 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1352 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1353 0,MatAssemblyEnd_SeqBAIJ, 1354 0, 1355 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1356 MatGetReordering_SeqBAIJ, 1357 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1358 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1359 MatILUFactorSymbolic_SeqBAIJ,0, 1360 0,0,/* MatConvert_SeqBAIJ */ 0, 1361 MatGetSubMatrix_SeqBAIJ,0, 1362 MatConvertSameType_SeqBAIJ,0,0, 1363 MatILUFactor_SeqBAIJ,0,0, 1364 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1365 MatGetValues_SeqBAIJ,0, 1366 0,MatScale_SeqBAIJ, 1367 0}; 1368 1369 /*@C 1370 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1371 compressed row) format. For good matrix assembly performance the 1372 user should preallocate the matrix storage by setting the parameter nz 1373 (or nzz). By setting these parameters accurately, performance can be 1374 increased by more than a factor of 50. 1375 1376 Input Parameters: 1377 . comm - MPI communicator, set to MPI_COMM_SELF 1378 . bs - size of block 1379 . m - number of rows 1380 . n - number of columns 1381 . nz - number of block nonzeros per block row (same for all rows) 1382 . nzz - number of block nonzeros per block row or PETSC_NULL 1383 (possibly different for each row) 1384 1385 Output Parameter: 1386 . A - the matrix 1387 1388 Notes: 1389 The block AIJ format is fully compatible with standard Fortran 77 1390 storage. That is, the stored row and column indices can begin at 1391 either one (as in Fortran) or zero. See the users' manual for details. 1392 1393 Specify the preallocated storage with either nz or nnz (not both). 1394 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1395 allocation. For additional details, see the users manual chapter on 1396 matrices and the file $(PETSC_DIR)/Performance. 1397 1398 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1399 @*/ 1400 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1401 { 1402 Mat B; 1403 Mat_SeqBAIJ *b; 1404 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1405 1406 if (mbs*bs!=m || nbs*bs!=n) 1407 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1408 1409 *A = 0; 1410 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1411 PLogObjectCreate(B); 1412 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1413 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1414 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1415 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1416 if (!flg) { 1417 switch (bs) { 1418 case 1: 1419 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1420 B->ops.solve = MatSolve_SeqBAIJ_1; 1421 B->ops.mult = MatMult_SeqBAIJ_1; 1422 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1423 break; 1424 case 2: 1425 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1426 B->ops.solve = MatSolve_SeqBAIJ_2; 1427 B->ops.mult = MatMult_SeqBAIJ_2; 1428 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1429 break; 1430 case 3: 1431 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1432 B->ops.solve = MatSolve_SeqBAIJ_3; 1433 B->ops.mult = MatMult_SeqBAIJ_3; 1434 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1435 break; 1436 case 4: 1437 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1438 B->ops.solve = MatSolve_SeqBAIJ_4; 1439 B->ops.mult = MatMult_SeqBAIJ_4; 1440 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1441 break; 1442 case 5: 1443 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1444 B->ops.solve = MatSolve_SeqBAIJ_5; 1445 B->ops.mult = MatMult_SeqBAIJ_5; 1446 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1447 break; 1448 } 1449 } 1450 B->destroy = MatDestroy_SeqBAIJ; 1451 B->view = MatView_SeqBAIJ; 1452 B->factor = 0; 1453 B->lupivotthreshold = 1.0; 1454 b->row = 0; 1455 b->col = 0; 1456 b->reallocs = 0; 1457 1458 b->m = m; B->m = m; B->M = m; 1459 b->n = n; B->n = n; B->N = n; 1460 b->mbs = mbs; 1461 b->nbs = nbs; 1462 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1463 if (nnz == PETSC_NULL) { 1464 if (nz == PETSC_DEFAULT) nz = 5; 1465 else if (nz <= 0) nz = 1; 1466 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1467 nz = nz*mbs; 1468 } 1469 else { 1470 nz = 0; 1471 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1472 } 1473 1474 /* allocate the matrix space */ 1475 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1476 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1477 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1478 b->j = (int *) (b->a + nz*bs2); 1479 PetscMemzero(b->j,nz*sizeof(int)); 1480 b->i = b->j + nz; 1481 b->singlemalloc = 1; 1482 1483 b->i[0] = 0; 1484 for (i=1; i<mbs+1; i++) { 1485 b->i[i] = b->i[i-1] + b->imax[i-1]; 1486 } 1487 1488 /* b->ilen will count nonzeros in each block row so far. */ 1489 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1490 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1491 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1492 1493 b->bs = bs; 1494 b->bs2 = bs2; 1495 b->mbs = mbs; 1496 b->nz = 0; 1497 b->maxnz = nz*bs2; 1498 b->sorted = 0; 1499 b->roworiented = 1; 1500 b->nonew = 0; 1501 b->diag = 0; 1502 b->solve_work = 0; 1503 b->mult_work = 0; 1504 b->spptr = 0; 1505 *A = B; 1506 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1507 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1508 return 0; 1509 } 1510 1511 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1512 { 1513 Mat C; 1514 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1515 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1516 1517 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1518 1519 *B = 0; 1520 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1521 PLogObjectCreate(C); 1522 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1523 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1524 C->destroy = MatDestroy_SeqBAIJ; 1525 C->view = MatView_SeqBAIJ; 1526 C->factor = A->factor; 1527 c->row = 0; 1528 c->col = 0; 1529 C->assembled = PETSC_TRUE; 1530 1531 c->m = C->m = a->m; 1532 c->n = C->n = a->n; 1533 C->M = a->m; 1534 C->N = a->n; 1535 1536 c->bs = a->bs; 1537 c->bs2 = a->bs2; 1538 c->mbs = a->mbs; 1539 c->nbs = a->nbs; 1540 1541 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1542 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1543 for ( i=0; i<mbs; i++ ) { 1544 c->imax[i] = a->imax[i]; 1545 c->ilen[i] = a->ilen[i]; 1546 } 1547 1548 /* allocate the matrix space */ 1549 c->singlemalloc = 1; 1550 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1551 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1552 c->j = (int *) (c->a + nz*bs2); 1553 c->i = c->j + nz; 1554 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1555 if (mbs > 0) { 1556 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1557 if (cpvalues == COPY_VALUES) { 1558 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1559 } 1560 } 1561 1562 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1563 c->sorted = a->sorted; 1564 c->roworiented = a->roworiented; 1565 c->nonew = a->nonew; 1566 1567 if (a->diag) { 1568 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1569 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1570 for ( i=0; i<mbs; i++ ) { 1571 c->diag[i] = a->diag[i]; 1572 } 1573 } 1574 else c->diag = 0; 1575 c->nz = a->nz; 1576 c->maxnz = a->maxnz; 1577 c->solve_work = 0; 1578 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1579 c->mult_work = 0; 1580 *B = C; 1581 return 0; 1582 } 1583 1584 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1585 { 1586 Mat_SeqBAIJ *a; 1587 Mat B; 1588 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1589 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1590 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1591 int *masked, nmask,tmp,bs2,ishift; 1592 Scalar *aa; 1593 MPI_Comm comm = ((PetscObject) viewer)->comm; 1594 1595 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1596 bs2 = bs*bs; 1597 1598 MPI_Comm_size(comm,&size); 1599 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1600 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1601 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1602 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1603 M = header[1]; N = header[2]; nz = header[3]; 1604 1605 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1606 1607 /* 1608 This code adds extra rows to make sure the number of rows is 1609 divisible by the blocksize 1610 */ 1611 mbs = M/bs; 1612 extra_rows = bs - M + bs*(mbs); 1613 if (extra_rows == bs) extra_rows = 0; 1614 else mbs++; 1615 if (extra_rows) { 1616 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1617 } 1618 1619 /* read in row lengths */ 1620 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1621 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1622 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1623 1624 /* read in column indices */ 1625 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1626 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1627 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1628 1629 /* loop over row lengths determining block row lengths */ 1630 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1631 PetscMemzero(browlengths,mbs*sizeof(int)); 1632 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1633 PetscMemzero(mask,mbs*sizeof(int)); 1634 masked = mask + mbs; 1635 rowcount = 0; nzcount = 0; 1636 for ( i=0; i<mbs; i++ ) { 1637 nmask = 0; 1638 for ( j=0; j<bs; j++ ) { 1639 kmax = rowlengths[rowcount]; 1640 for ( k=0; k<kmax; k++ ) { 1641 tmp = jj[nzcount++]/bs; 1642 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1643 } 1644 rowcount++; 1645 } 1646 browlengths[i] += nmask; 1647 /* zero out the mask elements we set */ 1648 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1649 } 1650 1651 /* create our matrix */ 1652 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1653 CHKERRQ(ierr); 1654 B = *A; 1655 a = (Mat_SeqBAIJ *) B->data; 1656 1657 /* set matrix "i" values */ 1658 a->i[0] = 0; 1659 for ( i=1; i<= mbs; i++ ) { 1660 a->i[i] = a->i[i-1] + browlengths[i-1]; 1661 a->ilen[i-1] = browlengths[i-1]; 1662 } 1663 a->nz = 0; 1664 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1665 1666 /* read in nonzero values */ 1667 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1668 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1669 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1670 1671 /* set "a" and "j" values into matrix */ 1672 nzcount = 0; jcount = 0; 1673 for ( i=0; i<mbs; i++ ) { 1674 nzcountb = nzcount; 1675 nmask = 0; 1676 for ( j=0; j<bs; j++ ) { 1677 kmax = rowlengths[i*bs+j]; 1678 for ( k=0; k<kmax; k++ ) { 1679 tmp = jj[nzcount++]/bs; 1680 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1681 } 1682 rowcount++; 1683 } 1684 /* sort the masked values */ 1685 PetscSortInt(nmask,masked); 1686 1687 /* set "j" values into matrix */ 1688 maskcount = 1; 1689 for ( j=0; j<nmask; j++ ) { 1690 a->j[jcount++] = masked[j]; 1691 mask[masked[j]] = maskcount++; 1692 } 1693 /* set "a" values into matrix */ 1694 ishift = bs2*a->i[i]; 1695 for ( j=0; j<bs; j++ ) { 1696 kmax = rowlengths[i*bs+j]; 1697 for ( k=0; k<kmax; k++ ) { 1698 tmp = jj[nzcountb]/bs ; 1699 block = mask[tmp] - 1; 1700 point = jj[nzcountb] - bs*tmp; 1701 idx = ishift + bs2*block + j + bs*point; 1702 a->a[idx] = aa[nzcountb++]; 1703 } 1704 } 1705 /* zero out the mask elements we set */ 1706 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1707 } 1708 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1709 1710 PetscFree(rowlengths); 1711 PetscFree(browlengths); 1712 PetscFree(aa); 1713 PetscFree(jj); 1714 PetscFree(mask); 1715 1716 B->assembled = PETSC_TRUE; 1717 1718 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1719 if (flg) { 1720 Viewer tviewer; 1721 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1722 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1723 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1724 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1725 } 1726 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1727 if (flg) { 1728 Viewer tviewer; 1729 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1730 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1731 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1732 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1733 } 1734 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1735 if (flg) { 1736 Viewer tviewer; 1737 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1738 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1739 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1740 } 1741 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1742 if (flg) { 1743 Viewer tviewer; 1744 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1745 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1746 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1747 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1748 } 1749 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1750 if (flg) { 1751 Viewer tviewer; 1752 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1753 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1754 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1755 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1756 } 1757 return 0; 1758 } 1759 1760 1761 1762