1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.50 1996/06/24 20:57:39 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatOrdering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 220 { 221 Mat A = (Mat) obj; 222 ViewerType vtype; 223 int ierr; 224 225 if (!viewer) { 226 viewer = STDOUT_VIEWER_SELF; 227 } 228 229 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 230 if (vtype == MATLAB_VIEWER) { 231 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 232 } 233 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 234 return MatView_SeqBAIJ_ASCII(A,viewer); 235 } 236 else if (vtype == BINARY_FILE_VIEWER) { 237 return MatView_SeqBAIJ_Binary(A,viewer); 238 } 239 else if (vtype == DRAW_VIEWER) { 240 SETERRQ(1,"MatView_SeqBAIJ:Draw viewer not supported"); 241 } 242 return 0; 243 } 244 245 #define CHUNKSIZE 10 246 247 /* This version has row oriented v */ 248 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 251 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 252 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 253 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 254 int ridx,cidx,bs2=a->bs2; 255 Scalar *ap,value,*aa=a->a,*bap; 256 257 for ( k=0; k<m; k++ ) { /* loop over added rows */ 258 row = im[k]; brow = row/bs; 259 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 260 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 261 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 262 rmax = imax[brow]; nrow = ailen[brow]; 263 low = 0; 264 for ( l=0; l<n; l++ ) { /* loop over added columns */ 265 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 266 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 267 col = in[l]; bcol = col/bs; 268 ridx = row % bs; cidx = col % bs; 269 if (roworiented) { 270 value = *v++; 271 } 272 else { 273 value = v[k + l*m]; 274 } 275 if (!sorted) low = 0; high = nrow; 276 while (high-low > 5) { 277 t = (low+high)/2; 278 if (rp[t] > bcol) high = t; 279 else low = t; 280 } 281 for ( i=low; i<high; i++ ) { 282 if (rp[i] > bcol) break; 283 if (rp[i] == bcol) { 284 bap = ap + bs2*i + bs*cidx + ridx; 285 if (is == ADD_VALUES) *bap += value; 286 else *bap = value; 287 goto noinsert; 288 } 289 } 290 if (nonew) goto noinsert; 291 if (nrow >= rmax) { 292 /* there is no extra room in row, therefore enlarge */ 293 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 294 Scalar *new_a; 295 296 /* malloc new storage space */ 297 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 298 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 299 new_j = (int *) (new_a + bs2*new_nz); 300 new_i = new_j + new_nz; 301 302 /* copy over old data into new slots */ 303 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 304 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 305 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 306 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 307 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 308 len*sizeof(int)); 309 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 310 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 311 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 312 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 313 /* free up old matrix storage */ 314 PetscFree(a->a); 315 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 316 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 317 a->singlemalloc = 1; 318 319 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 320 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 321 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 322 a->maxnz += bs2*CHUNKSIZE; 323 a->reallocs++; 324 a->nz++; 325 } 326 N = nrow++ - 1; 327 /* shift up all the later entries in this row */ 328 for ( ii=N; ii>=i; ii-- ) { 329 rp[ii+1] = rp[ii]; 330 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 331 } 332 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 333 rp[i] = bcol; 334 ap[bs2*i + bs*cidx + ridx] = value; 335 noinsert:; 336 low = i; 337 } 338 ailen[brow] = nrow; 339 } 340 return 0; 341 } 342 343 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 344 { 345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 346 *m = a->m; *n = a->n; 347 return 0; 348 } 349 350 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 351 { 352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 353 *m = 0; *n = a->m; 354 return 0; 355 } 356 357 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 358 { 359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 360 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 361 Scalar *aa,*v_i,*aa_i; 362 363 bs = a->bs; 364 ai = a->i; 365 aj = a->j; 366 aa = a->a; 367 bs2 = a->bs2; 368 369 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 370 371 bn = row/bs; /* Block number */ 372 bp = row % bs; /* Block Position */ 373 M = ai[bn+1] - ai[bn]; 374 *nz = bs*M; 375 376 if (v) { 377 *v = 0; 378 if (*nz) { 379 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 380 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 381 v_i = *v + i*bs; 382 aa_i = aa + bs2*(ai[bn] + i); 383 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 384 } 385 } 386 } 387 388 if (idx) { 389 *idx = 0; 390 if (*nz) { 391 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 392 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 393 idx_i = *idx + i*bs; 394 itmp = bs*aj[ai[bn] + i]; 395 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 396 } 397 } 398 } 399 return 0; 400 } 401 402 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 403 { 404 if (idx) {if (*idx) PetscFree(*idx);} 405 if (v) {if (*v) PetscFree(*v);} 406 return 0; 407 } 408 409 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 410 { 411 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 412 Mat C; 413 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 414 int *rows,*cols,bs2=a->bs2; 415 Scalar *array=a->a; 416 417 if (B==PETSC_NULL && mbs!=nbs) 418 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 419 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 420 PetscMemzero(col,(1+nbs)*sizeof(int)); 421 422 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 423 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 424 PetscFree(col); 425 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 426 cols = rows + bs; 427 for ( i=0; i<mbs; i++ ) { 428 cols[0] = i*bs; 429 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 430 len = ai[i+1] - ai[i]; 431 for ( j=0; j<len; j++ ) { 432 rows[0] = (*aj++)*bs; 433 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 434 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 435 array += bs2; 436 } 437 } 438 PetscFree(rows); 439 440 ierr = MatAssemblyBegin(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 441 ierr = MatAssemblyEnd(C,FINAL_ASSEMBLY); CHKERRQ(ierr); 442 443 if (B != PETSC_NULL) { 444 *B = C; 445 } else { 446 /* This isn't really an in-place transpose */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 449 if (a->diag) PetscFree(a->diag); 450 if (a->ilen) PetscFree(a->ilen); 451 if (a->imax) PetscFree(a->imax); 452 if (a->solve_work) PetscFree(a->solve_work); 453 PetscFree(a); 454 PetscMemcpy(A,C,sizeof(struct _Mat)); 455 PetscHeaderDestroy(C); 456 } 457 return 0; 458 } 459 460 461 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 462 { 463 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 464 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 465 int m = a->m,*ip, N, *ailen = a->ilen; 466 int mbs = a->mbs, bs2 = a->bs2; 467 Scalar *aa = a->a, *ap; 468 469 if (mode == FLUSH_ASSEMBLY) return 0; 470 471 for ( i=1; i<mbs; i++ ) { 472 /* move each row back by the amount of empty slots (fshift) before it*/ 473 fshift += imax[i-1] - ailen[i-1]; 474 if (fshift) { 475 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 476 N = ailen[i]; 477 for ( j=0; j<N; j++ ) { 478 ip[j-fshift] = ip[j]; 479 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 480 } 481 } 482 ai[i] = ai[i-1] + ailen[i-1]; 483 } 484 if (mbs) { 485 fshift += imax[mbs-1] - ailen[mbs-1]; 486 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 487 } 488 /* reset ilen and imax for each row */ 489 for ( i=0; i<mbs; i++ ) { 490 ailen[i] = imax[i] = ai[i+1] - ai[i]; 491 } 492 a->nz = ai[mbs]; 493 494 /* diagonals may have moved, so kill the diagonal pointers */ 495 if (fshift && a->diag) { 496 PetscFree(a->diag); 497 PLogObjectMemory(A,-(m+1)*sizeof(int)); 498 a->diag = 0; 499 } 500 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 501 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 502 a->reallocs); 503 return 0; 504 } 505 506 static int MatZeroEntries_SeqBAIJ(Mat A) 507 { 508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 509 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 510 return 0; 511 } 512 513 int MatDestroy_SeqBAIJ(PetscObject obj) 514 { 515 Mat A = (Mat) obj; 516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 517 518 #if defined(PETSC_LOG) 519 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 520 #endif 521 PetscFree(a->a); 522 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 523 if (a->diag) PetscFree(a->diag); 524 if (a->ilen) PetscFree(a->ilen); 525 if (a->imax) PetscFree(a->imax); 526 if (a->solve_work) PetscFree(a->solve_work); 527 if (a->mult_work) PetscFree(a->mult_work); 528 PetscFree(a); 529 PLogObjectDestroy(A); 530 PetscHeaderDestroy(A); 531 return 0; 532 } 533 534 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 535 { 536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 537 if (op == ROW_ORIENTED) a->roworiented = 1; 538 else if (op == COLUMN_ORIENTED) a->roworiented = 0; 539 else if (op == COLUMNS_SORTED) a->sorted = 1; 540 else if (op == NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 541 else if (op == YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 542 else if (op == ROWS_SORTED || 543 op == SYMMETRIC_MATRIX || 544 op == STRUCTURALLY_SYMMETRIC_MATRIX || 545 op == YES_NEW_DIAGONALS) 546 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 547 else if (op == NO_NEW_DIAGONALS) 548 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:NO_NEW_DIAGONALS");} 549 else 550 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 551 return 0; 552 } 553 554 555 /* -------------------------------------------------------*/ 556 /* Should check that shapes of vectors and matrices match */ 557 /* -------------------------------------------------------*/ 558 #include "pinclude/plapack.h" 559 560 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 561 { 562 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 563 Scalar *xg,*zg; 564 register Scalar *x,*z,*v,sum; 565 int mbs=a->mbs,i,*idx,*ii,n,ierr; 566 567 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 568 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 569 570 idx = a->j; 571 v = a->a; 572 ii = a->i; 573 574 for ( i=0; i<mbs; i++ ) { 575 n = ii[1] - ii[0]; ii++; 576 sum = 0.0; 577 while (n--) sum += *v++ * x[*idx++]; 578 z[i] = sum; 579 } 580 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 581 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 582 PLogFlops(2*a->nz - a->m); 583 return 0; 584 } 585 586 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 587 { 588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 589 Scalar *xg,*zg; 590 register Scalar *x,*z,*v,*xb,sum1,sum2; 591 register Scalar x1,x2; 592 int mbs=a->mbs,i,*idx,*ii; 593 int j,n,ierr; 594 595 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 596 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 597 598 idx = a->j; 599 v = a->a; 600 ii = a->i; 601 602 for ( i=0; i<mbs; i++ ) { 603 n = ii[1] - ii[0]; ii++; 604 sum1 = 0.0; sum2 = 0.0; 605 for ( j=0; j<n; j++ ) { 606 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 607 sum1 += v[0]*x1 + v[2]*x2; 608 sum2 += v[1]*x1 + v[3]*x2; 609 v += 4; 610 } 611 z[0] = sum1; z[1] = sum2; 612 z += 2; 613 } 614 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 615 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 616 PLogFlops(4*a->nz - a->m); 617 return 0; 618 } 619 620 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 621 { 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 623 Scalar *xg,*zg; 624 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 625 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 626 627 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 628 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 629 630 idx = a->j; 631 v = a->a; 632 ii = a->i; 633 634 for ( i=0; i<mbs; i++ ) { 635 n = ii[1] - ii[0]; ii++; 636 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 637 for ( j=0; j<n; j++ ) { 638 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 639 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 640 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 641 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 642 v += 9; 643 } 644 z[0] = sum1; z[1] = sum2; z[2] = sum3; 645 z += 3; 646 } 647 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 648 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 649 PLogFlops(18*a->nz - a->m); 650 return 0; 651 } 652 653 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 654 { 655 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 656 Scalar *xg,*zg; 657 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 658 register Scalar x1,x2,x3,x4; 659 int mbs=a->mbs,i,*idx,*ii; 660 int j,n,ierr; 661 662 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 663 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 664 665 idx = a->j; 666 v = a->a; 667 ii = a->i; 668 669 for ( i=0; i<mbs; i++ ) { 670 n = ii[1] - ii[0]; ii++; 671 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 672 for ( j=0; j<n; j++ ) { 673 xb = x + 4*(*idx++); 674 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 675 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 676 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 677 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 678 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 679 v += 16; 680 } 681 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 682 z += 4; 683 } 684 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 685 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 686 PLogFlops(32*a->nz - a->m); 687 return 0; 688 } 689 690 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 693 Scalar *xg,*zg; 694 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 695 register Scalar x1,x2,x3,x4,x5; 696 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 697 698 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 699 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 700 701 idx = a->j; 702 v = a->a; 703 ii = a->i; 704 705 for ( i=0; i<mbs; i++ ) { 706 n = ii[1] - ii[0]; ii++; 707 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 708 for ( j=0; j<n; j++ ) { 709 xb = x + 5*(*idx++); 710 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 711 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 712 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 713 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 714 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 715 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 716 v += 25; 717 } 718 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 719 z += 5; 720 } 721 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 722 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 723 PLogFlops(50*a->nz - a->m); 724 return 0; 725 } 726 727 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 728 { 729 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 730 Scalar *xg,*zg; 731 register Scalar *x,*z,*v,*xb; 732 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 733 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 734 735 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 736 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 737 738 idx = a->j; 739 v = a->a; 740 ii = a->i; 741 742 743 if (!a->mult_work) { 744 k = PetscMax(a->m,a->n); 745 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 746 } 747 work = a->mult_work; 748 for ( i=0; i<mbs; i++ ) { 749 n = ii[1] - ii[0]; ii++; 750 ncols = n*bs; 751 workt = work; 752 for ( j=0; j<n; j++ ) { 753 xb = x + bs*(*idx++); 754 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 755 workt += bs; 756 } 757 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 758 v += n*bs2; 759 z += bs; 760 } 761 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 762 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 763 PLogFlops(2*a->nz*bs2 - a->m); 764 return 0; 765 } 766 767 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 768 { 769 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 770 Scalar *xg,*yg,*zg; 771 register Scalar *x,*y,*z,*v,sum; 772 int mbs=a->mbs,i,*idx,*ii,n,ierr; 773 774 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 775 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 776 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 777 778 idx = a->j; 779 v = a->a; 780 ii = a->i; 781 782 for ( i=0; i<mbs; i++ ) { 783 n = ii[1] - ii[0]; ii++; 784 sum = y[i]; 785 while (n--) sum += *v++ * x[*idx++]; 786 z[i] = sum; 787 } 788 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 789 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 790 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 791 PLogFlops(2*a->nz); 792 return 0; 793 } 794 795 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798 Scalar *xg,*yg,*zg; 799 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 800 register Scalar x1,x2; 801 int mbs=a->mbs,i,*idx,*ii; 802 int j,n,ierr; 803 804 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 805 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 806 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 807 808 idx = a->j; 809 v = a->a; 810 ii = a->i; 811 812 for ( i=0; i<mbs; i++ ) { 813 n = ii[1] - ii[0]; ii++; 814 sum1 = y[0]; sum2 = y[1]; 815 for ( j=0; j<n; j++ ) { 816 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 817 sum1 += v[0]*x1 + v[2]*x2; 818 sum2 += v[1]*x1 + v[3]*x2; 819 v += 4; 820 } 821 z[0] = sum1; z[1] = sum2; 822 z += 2; y += 2; 823 } 824 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 825 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 826 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 827 PLogFlops(4*a->nz); 828 return 0; 829 } 830 831 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 834 Scalar *xg,*yg,*zg; 835 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 836 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 837 838 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 839 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 840 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 841 842 idx = a->j; 843 v = a->a; 844 ii = a->i; 845 846 for ( i=0; i<mbs; i++ ) { 847 n = ii[1] - ii[0]; ii++; 848 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 849 for ( j=0; j<n; j++ ) { 850 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 851 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 852 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 853 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 854 v += 9; 855 } 856 z[0] = sum1; z[1] = sum2; z[2] = sum3; 857 z += 3; y += 3; 858 } 859 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 860 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 861 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 862 PLogFlops(18*a->nz); 863 return 0; 864 } 865 866 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 867 { 868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 869 Scalar *xg,*yg,*zg; 870 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 871 register Scalar x1,x2,x3,x4; 872 int mbs=a->mbs,i,*idx,*ii; 873 int j,n,ierr; 874 875 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 876 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 877 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 878 879 idx = a->j; 880 v = a->a; 881 ii = a->i; 882 883 for ( i=0; i<mbs; i++ ) { 884 n = ii[1] - ii[0]; ii++; 885 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 886 for ( j=0; j<n; j++ ) { 887 xb = x + 4*(*idx++); 888 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 889 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 890 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 891 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 892 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 893 v += 16; 894 } 895 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 896 z += 4; y += 4; 897 } 898 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 899 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 900 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 901 PLogFlops(32*a->nz); 902 return 0; 903 } 904 905 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 906 { 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 908 Scalar *xg,*yg,*zg; 909 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 910 register Scalar x1,x2,x3,x4,x5; 911 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 912 913 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 914 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 915 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 916 917 idx = a->j; 918 v = a->a; 919 ii = a->i; 920 921 for ( i=0; i<mbs; i++ ) { 922 n = ii[1] - ii[0]; ii++; 923 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 924 for ( j=0; j<n; j++ ) { 925 xb = x + 5*(*idx++); 926 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 927 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 928 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 929 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 930 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 931 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 932 v += 25; 933 } 934 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 935 z += 5; y += 5; 936 } 937 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 938 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 939 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 940 PLogFlops(50*a->nz); 941 return 0; 942 } 943 944 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 947 Scalar *xg,*zg; 948 register Scalar *x,*z,*v,*xb; 949 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 950 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 951 952 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 953 954 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 955 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 956 957 idx = a->j; 958 v = a->a; 959 ii = a->i; 960 961 962 if (!a->mult_work) { 963 k = PetscMax(a->m,a->n); 964 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 965 } 966 work = a->mult_work; 967 for ( i=0; i<mbs; i++ ) { 968 n = ii[1] - ii[0]; ii++; 969 ncols = n*bs; 970 workt = work; 971 for ( j=0; j<n; j++ ) { 972 xb = x + bs*(*idx++); 973 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 974 workt += bs; 975 } 976 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 977 v += n*bs2; 978 z += bs; 979 } 980 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 981 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 982 PLogFlops(2*a->nz*bs2 ); 983 return 0; 984 } 985 986 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 987 { 988 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 989 Scalar *xg,*zg,*zb; 990 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 991 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 992 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 993 994 995 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 996 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 997 PetscMemzero(z,N*sizeof(Scalar)); 998 999 idx = a->j; 1000 v = a->a; 1001 ii = a->i; 1002 1003 switch (bs) { 1004 case 1: 1005 for ( i=0; i<mbs; i++ ) { 1006 n = ii[1] - ii[0]; ii++; 1007 xb = x + i; x1 = xb[0]; 1008 ib = idx + ai[i]; 1009 for ( j=0; j<n; j++ ) { 1010 rval = ib[j]; 1011 z[rval] += *v++ * x1; 1012 } 1013 } 1014 break; 1015 case 2: 1016 for ( i=0; i<mbs; i++ ) { 1017 n = ii[1] - ii[0]; ii++; 1018 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1019 ib = idx + ai[i]; 1020 for ( j=0; j<n; j++ ) { 1021 rval = ib[j]*2; 1022 z[rval++] += v[0]*x1 + v[1]*x2; 1023 z[rval++] += v[2]*x1 + v[3]*x2; 1024 v += 4; 1025 } 1026 } 1027 break; 1028 case 3: 1029 for ( i=0; i<mbs; i++ ) { 1030 n = ii[1] - ii[0]; ii++; 1031 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1032 ib = idx + ai[i]; 1033 for ( j=0; j<n; j++ ) { 1034 rval = ib[j]*3; 1035 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1036 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1037 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1038 v += 9; 1039 } 1040 } 1041 break; 1042 case 4: 1043 for ( i=0; i<mbs; i++ ) { 1044 n = ii[1] - ii[0]; ii++; 1045 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1046 ib = idx + ai[i]; 1047 for ( j=0; j<n; j++ ) { 1048 rval = ib[j]*4; 1049 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1050 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1051 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1052 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1053 v += 16; 1054 } 1055 } 1056 break; 1057 case 5: 1058 for ( i=0; i<mbs; i++ ) { 1059 n = ii[1] - ii[0]; ii++; 1060 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1061 x4 = xb[3]; x5 = xb[4]; 1062 ib = idx + ai[i]; 1063 for ( j=0; j<n; j++ ) { 1064 rval = ib[j]*5; 1065 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1066 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1067 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1068 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1069 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1070 v += 25; 1071 } 1072 } 1073 break; 1074 /* block sizes larger then 5 by 5 are handled by BLAS */ 1075 default: { 1076 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1077 if (!a->mult_work) { 1078 k = PetscMax(a->m,a->n); 1079 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1080 CHKPTRQ(a->mult_work); 1081 } 1082 work = a->mult_work; 1083 for ( i=0; i<mbs; i++ ) { 1084 n = ii[1] - ii[0]; ii++; 1085 ncols = n*bs; 1086 PetscMemzero(work,ncols*sizeof(Scalar)); 1087 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1088 v += n*bs2; 1089 x += bs; 1090 workt = work; 1091 for ( j=0; j<n; j++ ) { 1092 zb = z + bs*(*idx++); 1093 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1094 workt += bs; 1095 } 1096 } 1097 } 1098 } 1099 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1100 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1101 PLogFlops(2*a->nz*a->bs2 - a->n); 1102 return 0; 1103 } 1104 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1105 { 1106 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1107 Scalar *xg,*zg,*zb; 1108 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1109 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1110 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1111 1112 1113 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1114 else PetscMemzero(z,N*sizeof(Scalar)); 1115 1116 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1117 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1118 1119 1120 idx = a->j; 1121 v = a->a; 1122 ii = a->i; 1123 1124 switch (bs) { 1125 case 1: 1126 for ( i=0; i<mbs; i++ ) { 1127 n = ii[1] - ii[0]; ii++; 1128 xb = x + i; x1 = xb[0]; 1129 ib = idx + ai[i]; 1130 for ( j=0; j<n; j++ ) { 1131 rval = ib[j]; 1132 z[rval] += *v++ * x1; 1133 } 1134 } 1135 break; 1136 case 2: 1137 for ( i=0; i<mbs; i++ ) { 1138 n = ii[1] - ii[0]; ii++; 1139 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1140 ib = idx + ai[i]; 1141 for ( j=0; j<n; j++ ) { 1142 rval = ib[j]*2; 1143 z[rval++] += v[0]*x1 + v[1]*x2; 1144 z[rval++] += v[2]*x1 + v[3]*x2; 1145 v += 4; 1146 } 1147 } 1148 break; 1149 case 3: 1150 for ( i=0; i<mbs; i++ ) { 1151 n = ii[1] - ii[0]; ii++; 1152 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1153 ib = idx + ai[i]; 1154 for ( j=0; j<n; j++ ) { 1155 rval = ib[j]*3; 1156 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1157 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1158 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1159 v += 9; 1160 } 1161 } 1162 break; 1163 case 4: 1164 for ( i=0; i<mbs; i++ ) { 1165 n = ii[1] - ii[0]; ii++; 1166 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1167 ib = idx + ai[i]; 1168 for ( j=0; j<n; j++ ) { 1169 rval = ib[j]*4; 1170 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1171 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1172 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1173 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1174 v += 16; 1175 } 1176 } 1177 break; 1178 case 5: 1179 for ( i=0; i<mbs; i++ ) { 1180 n = ii[1] - ii[0]; ii++; 1181 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1182 x4 = xb[3]; x5 = xb[4]; 1183 ib = idx + ai[i]; 1184 for ( j=0; j<n; j++ ) { 1185 rval = ib[j]*5; 1186 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1187 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1188 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1189 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1190 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1191 v += 25; 1192 } 1193 } 1194 break; 1195 /* block sizes larger then 5 by 5 are handled by BLAS */ 1196 default: { 1197 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1198 if (!a->mult_work) { 1199 k = PetscMax(a->m,a->n); 1200 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1201 CHKPTRQ(a->mult_work); 1202 } 1203 work = a->mult_work; 1204 for ( i=0; i<mbs; i++ ) { 1205 n = ii[1] - ii[0]; ii++; 1206 ncols = n*bs; 1207 PetscMemzero(work,ncols*sizeof(Scalar)); 1208 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1209 v += n*bs2; 1210 x += bs; 1211 workt = work; 1212 for ( j=0; j<n; j++ ) { 1213 zb = z + bs*(*idx++); 1214 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1215 workt += bs; 1216 } 1217 } 1218 } 1219 } 1220 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1221 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1222 PLogFlops(2*a->nz*a->bs2); 1223 return 0; 1224 } 1225 1226 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1227 { 1228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1229 if (nz) *nz = a->bs2*a->nz; 1230 if (nza) *nza = a->maxnz; 1231 if (mem) *mem = (int)A->mem; 1232 return 0; 1233 } 1234 1235 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1238 1239 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1240 1241 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1242 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1243 (a->nz != b->nz)) { 1244 *flg = PETSC_FALSE; return 0; 1245 } 1246 1247 /* if the a->i are the same */ 1248 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1249 *flg = PETSC_FALSE; return 0; 1250 } 1251 1252 /* if a->j are the same */ 1253 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1254 *flg = PETSC_FALSE; return 0; 1255 } 1256 1257 /* if a->a are the same */ 1258 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1259 *flg = PETSC_FALSE; return 0; 1260 } 1261 *flg = PETSC_TRUE; 1262 return 0; 1263 1264 } 1265 1266 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1267 { 1268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1269 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1270 Scalar *x, zero = 0.0,*aa,*aa_j; 1271 1272 bs = a->bs; 1273 aa = a->a; 1274 ai = a->i; 1275 aj = a->j; 1276 ambs = a->mbs; 1277 bs2 = a->bs2; 1278 1279 VecSet(&zero,v); 1280 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1281 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1282 for ( i=0; i<ambs; i++ ) { 1283 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1284 if (aj[j] == i) { 1285 row = i*bs; 1286 aa_j = aa+j*bs2; 1287 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1288 break; 1289 } 1290 } 1291 } 1292 return 0; 1293 } 1294 1295 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1296 { 1297 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1298 Scalar *l,*r,x,*v,*aa,*li,*ri; 1299 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1300 1301 ai = a->i; 1302 aj = a->j; 1303 aa = a->a; 1304 m = a->m; 1305 n = a->n; 1306 bs = a->bs; 1307 mbs = a->mbs; 1308 bs2 = a->bs2; 1309 if (ll) { 1310 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1311 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1312 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1313 M = ai[i+1] - ai[i]; 1314 li = l + i*bs; 1315 v = aa + bs2*ai[i]; 1316 for ( j=0; j<M; j++ ) { /* for each block */ 1317 for ( k=0; k<bs2; k++ ) { 1318 (*v++) *= li[k%bs]; 1319 } 1320 } 1321 } 1322 } 1323 1324 if (rr) { 1325 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1326 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1327 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1328 M = ai[i+1] - ai[i]; 1329 v = aa + bs2*ai[i]; 1330 for ( j=0; j<M; j++ ) { /* for each block */ 1331 ri = r + bs*aj[ai[i]+j]; 1332 for ( k=0; k<bs; k++ ) { 1333 x = ri[k]; 1334 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1335 } 1336 } 1337 } 1338 } 1339 return 0; 1340 } 1341 1342 1343 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1344 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1345 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1346 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1347 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1348 1349 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1350 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1351 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1352 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1353 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1354 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1355 1356 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1357 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1358 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1359 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1360 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1361 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1362 1363 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1364 { 1365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1366 Scalar *v = a->a; 1367 double sum = 0.0; 1368 int i,nz=a->nz,bs2=a->bs2; 1369 1370 if (type == NORM_FROBENIUS) { 1371 for (i=0; i< bs2*nz; i++ ) { 1372 #if defined(PETSC_COMPLEX) 1373 sum += real(conj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 *norm = sqrt(sum); 1379 } 1380 else { 1381 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1382 } 1383 return 0; 1384 } 1385 1386 /* 1387 note: This can only work for identity for row and col. It would 1388 be good to check this and otherwise generate an error. 1389 */ 1390 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1391 { 1392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1393 Mat outA; 1394 int ierr; 1395 1396 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1397 1398 outA = inA; 1399 inA->factor = FACTOR_LU; 1400 a->row = row; 1401 a->col = col; 1402 1403 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1404 1405 if (!a->diag) { 1406 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1407 } 1408 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1409 return 0; 1410 } 1411 1412 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1413 { 1414 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1415 int one = 1, totalnz = a->bs2*a->nz; 1416 BLscal_( &totalnz, alpha, a->a, &one ); 1417 PLogFlops(totalnz); 1418 return 0; 1419 } 1420 1421 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1422 { 1423 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1424 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1425 int *ai = a->i, *ailen = a->ilen; 1426 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1427 Scalar *ap, *aa = a->a, zero = 0.0; 1428 1429 for ( k=0; k<m; k++ ) { /* loop over rows */ 1430 row = im[k]; brow = row/bs; 1431 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1432 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1433 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1434 nrow = ailen[brow]; 1435 for ( l=0; l<n; l++ ) { /* loop over columns */ 1436 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1437 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1438 col = in[l] ; 1439 bcol = col/bs; 1440 cidx = col%bs; 1441 ridx = row%bs; 1442 high = nrow; 1443 low = 0; /* assume unsorted */ 1444 while (high-low > 5) { 1445 t = (low+high)/2; 1446 if (rp[t] > bcol) high = t; 1447 else low = t; 1448 } 1449 for ( i=low; i<high; i++ ) { 1450 if (rp[i] > bcol) break; 1451 if (rp[i] == bcol) { 1452 *v++ = ap[bs2*i+bs*cidx+ridx]; 1453 goto finished; 1454 } 1455 } 1456 *v++ = zero; 1457 finished:; 1458 } 1459 } 1460 return 0; 1461 } 1462 1463 /* -------------------------------------------------------------------*/ 1464 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1465 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1466 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1467 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1468 MatSolve_SeqBAIJ_N,0, 1469 0,0, 1470 MatLUFactor_SeqBAIJ,0, 1471 0, 1472 MatTranspose_SeqBAIJ, 1473 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1474 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1475 0,MatAssemblyEnd_SeqBAIJ, 1476 0, 1477 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,0, 1478 MatGetReordering_SeqBAIJ, 1479 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1480 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1481 MatILUFactorSymbolic_SeqBAIJ,0, 1482 0,0,/* MatConvert_SeqBAIJ */ 0, 1483 MatGetSubMatrix_SeqBAIJ,0, 1484 MatConvertSameType_SeqBAIJ,0,0, 1485 MatILUFactor_SeqBAIJ,0,0, 1486 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1487 MatGetValues_SeqBAIJ,0, 1488 0,MatScale_SeqBAIJ, 1489 0}; 1490 1491 /*@C 1492 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1493 compressed row) format. For good matrix assembly performance the 1494 user should preallocate the matrix storage by setting the parameter nz 1495 (or nzz). By setting these parameters accurately, performance can be 1496 increased by more than a factor of 50. 1497 1498 Input Parameters: 1499 . comm - MPI communicator, set to MPI_COMM_SELF 1500 . bs - size of block 1501 . m - number of rows 1502 . n - number of columns 1503 . nz - number of block nonzeros per block row (same for all rows) 1504 . nzz - number of block nonzeros per block row or PETSC_NULL 1505 (possibly different for each row) 1506 1507 Output Parameter: 1508 . A - the matrix 1509 1510 Notes: 1511 The block AIJ format is fully compatible with standard Fortran 77 1512 storage. That is, the stored row and column indices can begin at 1513 either one (as in Fortran) or zero. See the users' manual for details. 1514 1515 Specify the preallocated storage with either nz or nnz (not both). 1516 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1517 allocation. For additional details, see the users manual chapter on 1518 matrices and the file $(PETSC_DIR)/Performance. 1519 1520 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1521 @*/ 1522 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1523 { 1524 Mat B; 1525 Mat_SeqBAIJ *b; 1526 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1527 1528 if (mbs*bs!=m || nbs*bs!=n) 1529 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1530 1531 *A = 0; 1532 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1533 PLogObjectCreate(B); 1534 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1535 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1536 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1537 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1538 if (!flg) { 1539 switch (bs) { 1540 case 1: 1541 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1542 B->ops.solve = MatSolve_SeqBAIJ_1; 1543 B->ops.mult = MatMult_SeqBAIJ_1; 1544 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1545 break; 1546 case 2: 1547 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1548 B->ops.solve = MatSolve_SeqBAIJ_2; 1549 B->ops.mult = MatMult_SeqBAIJ_2; 1550 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1551 break; 1552 case 3: 1553 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1554 B->ops.solve = MatSolve_SeqBAIJ_3; 1555 B->ops.mult = MatMult_SeqBAIJ_3; 1556 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1557 break; 1558 case 4: 1559 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1560 B->ops.solve = MatSolve_SeqBAIJ_4; 1561 B->ops.mult = MatMult_SeqBAIJ_4; 1562 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1563 break; 1564 case 5: 1565 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1566 B->ops.solve = MatSolve_SeqBAIJ_5; 1567 B->ops.mult = MatMult_SeqBAIJ_5; 1568 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1569 break; 1570 } 1571 } 1572 B->destroy = MatDestroy_SeqBAIJ; 1573 B->view = MatView_SeqBAIJ; 1574 B->factor = 0; 1575 B->lupivotthreshold = 1.0; 1576 b->row = 0; 1577 b->col = 0; 1578 b->reallocs = 0; 1579 1580 b->m = m; B->m = m; B->M = m; 1581 b->n = n; B->n = n; B->N = n; 1582 b->mbs = mbs; 1583 b->nbs = nbs; 1584 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1585 if (nnz == PETSC_NULL) { 1586 if (nz == PETSC_DEFAULT) nz = 5; 1587 else if (nz <= 0) nz = 1; 1588 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1589 nz = nz*mbs; 1590 } 1591 else { 1592 nz = 0; 1593 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1594 } 1595 1596 /* allocate the matrix space */ 1597 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1598 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1599 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1600 b->j = (int *) (b->a + nz*bs2); 1601 PetscMemzero(b->j,nz*sizeof(int)); 1602 b->i = b->j + nz; 1603 b->singlemalloc = 1; 1604 1605 b->i[0] = 0; 1606 for (i=1; i<mbs+1; i++) { 1607 b->i[i] = b->i[i-1] + b->imax[i-1]; 1608 } 1609 1610 /* b->ilen will count nonzeros in each block row so far. */ 1611 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1612 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1613 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1614 1615 b->bs = bs; 1616 b->bs2 = bs2; 1617 b->mbs = mbs; 1618 b->nz = 0; 1619 b->maxnz = nz*bs2; 1620 b->sorted = 0; 1621 b->roworiented = 1; 1622 b->nonew = 0; 1623 b->diag = 0; 1624 b->solve_work = 0; 1625 b->mult_work = 0; 1626 b->spptr = 0; 1627 *A = B; 1628 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1629 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1630 return 0; 1631 } 1632 1633 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1634 { 1635 Mat C; 1636 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1637 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1638 1639 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1640 1641 *B = 0; 1642 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1643 PLogObjectCreate(C); 1644 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1645 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1646 C->destroy = MatDestroy_SeqBAIJ; 1647 C->view = MatView_SeqBAIJ; 1648 C->factor = A->factor; 1649 c->row = 0; 1650 c->col = 0; 1651 C->assembled = PETSC_TRUE; 1652 1653 c->m = C->m = a->m; 1654 c->n = C->n = a->n; 1655 C->M = a->m; 1656 C->N = a->n; 1657 1658 c->bs = a->bs; 1659 c->bs2 = a->bs2; 1660 c->mbs = a->mbs; 1661 c->nbs = a->nbs; 1662 1663 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1664 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1665 for ( i=0; i<mbs; i++ ) { 1666 c->imax[i] = a->imax[i]; 1667 c->ilen[i] = a->ilen[i]; 1668 } 1669 1670 /* allocate the matrix space */ 1671 c->singlemalloc = 1; 1672 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1673 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1674 c->j = (int *) (c->a + nz*bs2); 1675 c->i = c->j + nz; 1676 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1677 if (mbs > 0) { 1678 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1679 if (cpvalues == COPY_VALUES) { 1680 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1681 } 1682 } 1683 1684 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1685 c->sorted = a->sorted; 1686 c->roworiented = a->roworiented; 1687 c->nonew = a->nonew; 1688 1689 if (a->diag) { 1690 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1691 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1692 for ( i=0; i<mbs; i++ ) { 1693 c->diag[i] = a->diag[i]; 1694 } 1695 } 1696 else c->diag = 0; 1697 c->nz = a->nz; 1698 c->maxnz = a->maxnz; 1699 c->solve_work = 0; 1700 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1701 c->mult_work = 0; 1702 *B = C; 1703 return 0; 1704 } 1705 1706 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1707 { 1708 Mat_SeqBAIJ *a; 1709 Mat B; 1710 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1711 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1712 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1713 int *masked, nmask,tmp,bs2,ishift; 1714 Scalar *aa; 1715 MPI_Comm comm = ((PetscObject) viewer)->comm; 1716 1717 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1718 bs2 = bs*bs; 1719 1720 MPI_Comm_size(comm,&size); 1721 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1722 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1723 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1724 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1725 M = header[1]; N = header[2]; nz = header[3]; 1726 1727 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1728 1729 /* 1730 This code adds extra rows to make sure the number of rows is 1731 divisible by the blocksize 1732 */ 1733 mbs = M/bs; 1734 extra_rows = bs - M + bs*(mbs); 1735 if (extra_rows == bs) extra_rows = 0; 1736 else mbs++; 1737 if (extra_rows) { 1738 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1739 } 1740 1741 /* read in row lengths */ 1742 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1743 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1744 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1745 1746 /* read in column indices */ 1747 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1748 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1749 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1750 1751 /* loop over row lengths determining block row lengths */ 1752 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1753 PetscMemzero(browlengths,mbs*sizeof(int)); 1754 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1755 PetscMemzero(mask,mbs*sizeof(int)); 1756 masked = mask + mbs; 1757 rowcount = 0; nzcount = 0; 1758 for ( i=0; i<mbs; i++ ) { 1759 nmask = 0; 1760 for ( j=0; j<bs; j++ ) { 1761 kmax = rowlengths[rowcount]; 1762 for ( k=0; k<kmax; k++ ) { 1763 tmp = jj[nzcount++]/bs; 1764 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1765 } 1766 rowcount++; 1767 } 1768 browlengths[i] += nmask; 1769 /* zero out the mask elements we set */ 1770 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1771 } 1772 1773 /* create our matrix */ 1774 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1775 CHKERRQ(ierr); 1776 B = *A; 1777 a = (Mat_SeqBAIJ *) B->data; 1778 1779 /* set matrix "i" values */ 1780 a->i[0] = 0; 1781 for ( i=1; i<= mbs; i++ ) { 1782 a->i[i] = a->i[i-1] + browlengths[i-1]; 1783 a->ilen[i-1] = browlengths[i-1]; 1784 } 1785 a->nz = 0; 1786 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1787 1788 /* read in nonzero values */ 1789 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1790 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1791 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1792 1793 /* set "a" and "j" values into matrix */ 1794 nzcount = 0; jcount = 0; 1795 for ( i=0; i<mbs; i++ ) { 1796 nzcountb = nzcount; 1797 nmask = 0; 1798 for ( j=0; j<bs; j++ ) { 1799 kmax = rowlengths[i*bs+j]; 1800 for ( k=0; k<kmax; k++ ) { 1801 tmp = jj[nzcount++]/bs; 1802 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1803 } 1804 rowcount++; 1805 } 1806 /* sort the masked values */ 1807 PetscSortInt(nmask,masked); 1808 1809 /* set "j" values into matrix */ 1810 maskcount = 1; 1811 for ( j=0; j<nmask; j++ ) { 1812 a->j[jcount++] = masked[j]; 1813 mask[masked[j]] = maskcount++; 1814 } 1815 /* set "a" values into matrix */ 1816 ishift = bs2*a->i[i]; 1817 for ( j=0; j<bs; j++ ) { 1818 kmax = rowlengths[i*bs+j]; 1819 for ( k=0; k<kmax; k++ ) { 1820 tmp = jj[nzcountb]/bs ; 1821 block = mask[tmp] - 1; 1822 point = jj[nzcountb] - bs*tmp; 1823 idx = ishift + bs2*block + j + bs*point; 1824 a->a[idx] = aa[nzcountb++]; 1825 } 1826 } 1827 /* zero out the mask elements we set */ 1828 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1829 } 1830 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 1831 1832 PetscFree(rowlengths); 1833 PetscFree(browlengths); 1834 PetscFree(aa); 1835 PetscFree(jj); 1836 PetscFree(mask); 1837 1838 B->assembled = PETSC_TRUE; 1839 1840 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 1841 if (flg) { 1842 Viewer tviewer; 1843 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1844 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 1845 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1846 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1847 } 1848 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 1849 if (flg) { 1850 Viewer tviewer; 1851 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1852 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 1853 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1854 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1855 } 1856 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 1857 if (flg) { 1858 Viewer tviewer; 1859 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1860 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1861 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1862 } 1863 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 1864 if (flg) { 1865 Viewer tviewer; 1866 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 1867 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 1868 ierr = MatView(B,tviewer); CHKERRQ(ierr); 1869 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1870 } 1871 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 1872 if (flg) { 1873 Viewer tviewer; 1874 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 1875 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 1876 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 1877 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 1878 } 1879 return 0; 1880 } 1881 1882 1883 1884