1 /*$Id: sbaij.c,v 1.36 2000/10/12 18:19:32 hzhang Exp hzhang $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 #include "src/mat/impls/sbaij/seq/sbaij.h" 12 13 #define CHUNKSIZE 10 14 15 /* 16 Checks for missing diagonals 17 */ 18 #undef __FUNC__ 19 #define __FUNC__ "MatMissingDiagonal_SeqSBAIJ" 20 int MatMissingDiagonal_SeqSBAIJ(Mat A) 21 { 22 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 23 int *diag,*jj = a->j,i,ierr; 24 25 PetscFunctionBegin; 26 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 27 diag = a->diag; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 31 } 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNC__ 37 #define __FUNC__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,j,*diag,m = a->mbs; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 47 PLogObjectMemory(A,(m+1)*sizeof(int)); 48 for (i=0; i<m; i++) { 49 diag[i] = a->i[i+1]; 50 for (j=a->i[i]; j<a->i[i+1]; j++) { 51 if (a->j[j] == i) { 52 diag[i] = j; 53 break; 54 } 55 } 56 } 57 a->diag = diag; 58 PetscFunctionReturn(0); 59 } 60 61 62 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 63 64 #undef __FUNC__ 65 #define __FUNC__ "MatGetRowIJ_SeqSBAIJ" 66 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 67 { 68 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 69 70 PetscFunctionBegin; 71 if (ia) { 72 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 73 } 74 *nn = a->mbs; 75 PetscFunctionReturn(0); 76 } 77 78 #undef __FUNC__ 79 #define __FUNC__ "MatRestoreRowIJ_SeqSBAIJ" 80 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 81 { 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 85 /* PetscFunctionReturn(0); */ 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatGetBlockSize_SeqSBAIJ" 90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNC__ 100 #define __FUNC__ "MatDestroy_SeqSBAIJ" 101 int MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 int ierr; 105 106 PetscFunctionBegin; 107 if (A->mapping) { 108 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 109 } 110 if (A->bmapping) { 111 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 112 } 113 if (A->rmap) { 114 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 115 } 116 if (A->cmap) { 117 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 118 } 119 #if defined(PETSC_USE_LOG) 120 PLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 121 #endif 122 ierr = PetscFree(a->a);CHKERRQ(ierr); 123 if (!a->singlemalloc) { 124 ierr = PetscFree(a->i);CHKERRQ(ierr); 125 ierr = PetscFree(a->j);CHKERRQ(ierr); 126 } 127 if (a->row) { 128 ierr = ISDestroy(a->row);CHKERRQ(ierr); 129 } 130 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 131 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 132 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 133 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 134 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 135 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 136 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 137 138 if (a->inew){ 139 ierr = PetscFree(a->inew);CHKERRQ(ierr); 140 a->inew = 0; 141 } 142 ierr = PetscFree(a);CHKERRQ(ierr); 143 PLogObjectDestroy(A); 144 PetscHeaderDestroy(A); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNC__ 149 #define __FUNC__ "MatSetOption_SeqSBAIJ" 150 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 154 PetscFunctionBegin; 155 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 156 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 157 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 158 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 159 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 160 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 161 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 162 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 163 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 164 else if (op == MAT_ROWS_SORTED || 165 op == MAT_ROWS_UNSORTED || 166 op == MAT_SYMMETRIC || 167 op == MAT_STRUCTURALLY_SYMMETRIC || 168 op == MAT_YES_NEW_DIAGONALS || 169 op == MAT_IGNORE_OFF_PROC_ENTRIES || 170 op == MAT_USE_HASH_TABLE) { 171 PLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 172 } else if (op == MAT_NO_NEW_DIAGONALS) { 173 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 174 } else { 175 SETERRQ(PETSC_ERR_SUP,"unknown option"); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNC__ 181 #define __FUNC__ "MatGetOwnershipRange_SeqSBAIJ" 182 int MatGetOwnershipRange_SeqSBAIJ(Mat A,int *m,int *n) 183 { 184 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 185 186 PetscFunctionBegin; 187 *m = 0; 188 *n = A->m; 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNC__ 193 #define __FUNC__ "MatGetRow_SeqSBAIJ" 194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,Scalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 198 MatScalar *aa,*aa_i; 199 Scalar *v_i; 200 201 PetscFunctionBegin; 202 bs = a->bs; 203 ai = a->i; 204 aj = a->j; 205 aa = a->a; 206 bs2 = a->bs2; 207 208 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 209 210 bn = row/bs; /* Block number */ 211 bp = row % bs; /* Block position */ 212 M = ai[bn+1] - ai[bn]; 213 *ncols = bs*M; 214 215 if (v) { 216 *v = 0; 217 if (*ncols) { 218 *v = (Scalar*)PetscMalloc((*ncols+row)*sizeof(Scalar));CHKPTRQ(*v); 219 for (i=0; i<M; i++) { /* for each block in the block row */ 220 v_i = *v + i*bs; 221 aa_i = aa + bs2*(ai[bn] + i); 222 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 223 } 224 } 225 } 226 227 if (cols) { 228 *cols = 0; 229 if (*ncols) { 230 *cols = (int*)PetscMalloc((*ncols+row)*sizeof(int));CHKPTRQ(*cols); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 cols_i = *cols + i*bs; 233 itmp = bs*aj[ai[bn] + i]; 234 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 235 } 236 } 237 } 238 239 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 240 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 241 #ifdef column_search 242 v_i = *v + M*bs; 243 cols_i = *cols + M*bs; 244 for (i=0; i<bn; i++){ /* for each block row */ 245 M = ai[i+1] - ai[i]; 246 for (j=0; j<M; j++){ 247 itmp = aj[ai[i] + j]; /* block column value */ 248 if (itmp == bn){ 249 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 250 for (k=0; k<bs; k++) { 251 *cols_i++ = i*bs+k; 252 *v_i++ = aa_i[k]; 253 } 254 *ncols += bs; 255 break; 256 } 257 } 258 } 259 #endif 260 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNC__ 265 #define __FUNC__ "MatRestoreRow_SeqSBAIJ" 266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatTranspose_SeqSBAIJ" 278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 PetscFunctionBegin; 281 SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called"); 282 /* PetscFunctionReturn(0); */ 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatView_SeqSBAIJ_Binary" 287 static int MatView_SeqSBAIJ_Binary(Mat A,Viewer viewer) 288 { 289 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 290 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 291 Scalar *aa; 292 FILE *file; 293 294 PetscFunctionBegin; 295 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 296 col_lens = (int*)PetscMalloc((4+A->m)*sizeof(int));CHKPTRQ(col_lens); 297 col_lens[0] = MAT_COOKIE; 298 299 col_lens[1] = A->m; 300 col_lens[2] = A->m; 301 col_lens[3] = a->s_nz*bs2; 302 303 /* store lengths of each row and write (including header) to file */ 304 count = 0; 305 for (i=0; i<a->mbs; i++) { 306 for (j=0; j<bs; j++) { 307 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 308 } 309 } 310 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 311 ierr = PetscFree(col_lens);CHKERRQ(ierr); 312 313 /* store column indices (zero start index) */ 314 jj = (int*)PetscMalloc((a->s_nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 315 count = 0; 316 for (i=0; i<a->mbs; i++) { 317 for (j=0; j<bs; j++) { 318 for (k=a->i[i]; k<a->i[i+1]; k++) { 319 for (l=0; l<bs; l++) { 320 jj[count++] = bs*a->j[k] + l; 321 } 322 } 323 } 324 } 325 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 326 ierr = PetscFree(jj);CHKERRQ(ierr); 327 328 /* store nonzero values */ 329 aa = (Scalar*)PetscMalloc((a->s_nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 330 count = 0; 331 for (i=0; i<a->mbs; i++) { 332 for (j=0; j<bs; j++) { 333 for (k=a->i[i]; k<a->i[i+1]; k++) { 334 for (l=0; l<bs; l++) { 335 aa[count++] = a->a[bs2*k + l*bs + j]; 336 } 337 } 338 } 339 } 340 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 341 ierr = PetscFree(aa);CHKERRQ(ierr); 342 343 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 344 if (file) { 345 fprintf(file,"-matload_block_size %d\n",a->bs); 346 } 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatView_SeqSBAIJ_ASCII" 352 static int MatView_SeqSBAIJ_ASCII(Mat A,Viewer viewer) 353 { 354 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 355 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 356 char *outputname; 357 358 PetscFunctionBegin; 359 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 360 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 361 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 362 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 363 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 364 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 365 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 366 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 367 for (i=0; i<a->mbs; i++) { 368 for (j=0; j<bs; j++) { 369 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 370 for (k=a->i[i]; k<a->i[i+1]; k++) { 371 for (l=0; l<bs; l++) { 372 #if defined(PETSC_USE_COMPLEX) 373 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 374 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 375 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 376 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 377 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 378 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 379 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 380 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 381 } 382 #else 383 if (a->a[bs2*k + l*bs + j] != 0.0) { 384 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 385 } 386 #endif 387 } 388 } 389 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 390 } 391 } 392 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 393 } else { 394 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 395 for (i=0; i<a->mbs; i++) { 396 for (j=0; j<bs; j++) { 397 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 398 for (k=a->i[i]; k<a->i[i+1]; k++) { 399 for (l=0; l<bs; l++) { 400 #if defined(PETSC_USE_COMPLEX) 401 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 402 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 403 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 405 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 406 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 407 } else { 408 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 409 } 410 #else 411 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 412 #endif 413 } 414 } 415 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 416 } 417 } 418 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 419 } 420 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNC__ 425 #define __FUNC__ "MatView_SeqSBAIJ_Draw_Zoom" 426 static int MatView_SeqSBAIJ_Draw_Zoom(Draw draw,void *Aa) 427 { 428 Mat A = (Mat) Aa; 429 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 430 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 431 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 432 MatScalar *aa; 433 MPI_Comm comm; 434 Viewer viewer; 435 436 PetscFunctionBegin; 437 /* 438 This is nasty. If this is called from an originally parallel matrix 439 then all processes call this,but only the first has the matrix so the 440 rest should return immediately. 441 */ 442 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 443 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 444 if (rank) PetscFunctionReturn(0); 445 446 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 447 448 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 449 DrawString(draw, .3*(xl+xr), .3*(yl+yr), DRAW_BLACK, "symmetric"); 450 451 /* loop over matrix elements drawing boxes */ 452 color = DRAW_BLUE; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) >= 0.) continue; 461 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 color = DRAW_CYAN; 467 for (i=0,row=0; i<mbs; i++,row+=bs) { 468 for (j=a->i[i]; j<a->i[i+1]; j++) { 469 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 470 x_l = a->j[j]*bs; x_r = x_l + 1.0; 471 aa = a->a + j*bs2; 472 for (k=0; k<bs; k++) { 473 for (l=0; l<bs; l++) { 474 if (PetscRealPart(*aa++) != 0.) continue; 475 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 476 } 477 } 478 } 479 } 480 481 color = DRAW_RED; 482 for (i=0,row=0; i<mbs; i++,row+=bs) { 483 for (j=a->i[i]; j<a->i[i+1]; j++) { 484 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 485 x_l = a->j[j]*bs; x_r = x_l + 1.0; 486 aa = a->a + j*bs2; 487 for (k=0; k<bs; k++) { 488 for (l=0; l<bs; l++) { 489 if (PetscRealPart(*aa++) <= 0.) continue; 490 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 491 } 492 } 493 } 494 } 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNC__ 499 #define __FUNC__ "MatView_SeqSBAIJ_Draw" 500 static int MatView_SeqSBAIJ_Draw(Mat A,Viewer viewer) 501 { 502 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 503 int ierr; 504 PetscReal xl,yl,xr,yr,w,h; 505 Draw draw; 506 PetscTruth isnull; 507 508 PetscFunctionBegin; 509 510 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 511 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 512 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 514 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 515 xr += w; yr += h; xl = -w; yl = -h; 516 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 517 ierr = DrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 518 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNC__ 523 #define __FUNC__ "MatView_SeqSBAIJ" 524 int MatView_SeqSBAIJ(Mat A,Viewer viewer) 525 { 526 int ierr; 527 PetscTruth issocket,isascii,isbinary,isdraw; 528 529 PetscFunctionBegin; 530 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 531 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 534 if (issocket) { 535 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 536 } else if (isascii){ 537 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 538 } else if (isbinary) { 539 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 540 } else if (isdraw) { 541 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 542 } else { 543 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 549 #undef __FUNC__ 550 #define __FUNC__ "MatGetValues_SeqSBAIJ" 551 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 552 { 553 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 554 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 555 int *ai = a->i,*ailen = a->ilen; 556 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 557 MatScalar *ap,*aa = a->a,zero = 0.0; 558 559 PetscFunctionBegin; 560 for (k=0; k<m; k++) { /* loop over rows */ 561 row = im[k]; brow = row/bs; 562 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 563 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 564 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 565 nrow = ailen[brow]; 566 for (l=0; l<n; l++) { /* loop over columns */ 567 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 568 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 569 col = in[l] ; 570 bcol = col/bs; 571 cidx = col%bs; 572 ridx = row%bs; 573 high = nrow; 574 low = 0; /* assume unsorted */ 575 while (high-low > 5) { 576 t = (low+high)/2; 577 if (rp[t] > bcol) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > bcol) break; 582 if (rp[i] == bcol) { 583 *v++ = ap[bs2*i+bs*cidx+ridx]; 584 goto finished; 585 } 586 } 587 *v++ = zero; 588 finished:; 589 } 590 } 591 PetscFunctionReturn(0); 592 } 593 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatSetValuesBlocked_SeqSBAIJ" 597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 598 { 599 PetscFunctionBegin; 600 SETERRQ(1,"Function not yet written for SBAIJ format"); 601 /* PetscFunctionReturn(0); */ 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatAssemblyEnd_SeqSBAIJ" 606 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 607 { 608 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 609 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 610 int m = A->m,*ip,N,*ailen = a->ilen; 611 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 612 MatScalar *aa = a->a,*ap; 613 614 PetscFunctionBegin; 615 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 616 617 if (m) rmax = ailen[0]; 618 for (i=1; i<mbs; i++) { 619 /* move each row back by the amount of empty slots (fshift) before it*/ 620 fshift += imax[i-1] - ailen[i-1]; 621 rmax = PetscMax(rmax,ailen[i]); 622 if (fshift) { 623 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 624 N = ailen[i]; 625 for (j=0; j<N; j++) { 626 ip[j-fshift] = ip[j]; 627 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 628 } 629 } 630 ai[i] = ai[i-1] + ailen[i-1]; 631 } 632 if (mbs) { 633 fshift += imax[mbs-1] - ailen[mbs-1]; 634 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 635 } 636 /* reset ilen and imax for each row */ 637 for (i=0; i<mbs; i++) { 638 ailen[i] = imax[i] = ai[i+1] - ai[i]; 639 } 640 a->s_nz = ai[mbs]; 641 642 /* diagonals may have moved, so kill the diagonal pointers */ 643 if (fshift && a->diag) { 644 ierr = PetscFree(a->diag);CHKERRQ(ierr); 645 PLogObjectMemory(A,-(m+1)*sizeof(int)); 646 a->diag = 0; 647 } 648 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 649 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 650 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 651 a->reallocs); 652 PLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 653 a->reallocs = 0; 654 A->info.nz_unneeded = (PetscReal)fshift*bs2; 655 656 PetscFunctionReturn(0); 657 } 658 659 /* 660 This function returns an array of flags which indicate the locations of contiguous 661 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 662 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 663 Assume: sizes should be long enough to hold all the values. 664 */ 665 #undef __FUNC__ 666 #define __FUNC__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 667 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 668 { 669 int i,j,k,row; 670 PetscTruth flg; 671 672 PetscFunctionBegin; 673 for (i=0,j=0; i<n; j++) { 674 row = idx[i]; 675 if (row%bs!=0) { /* Not the begining of a block */ 676 sizes[j] = 1; 677 i++; 678 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 679 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 680 i++; 681 } else { /* Begining of the block, so check if the complete block exists */ 682 flg = PETSC_TRUE; 683 for (k=1; k<bs; k++) { 684 if (row+k != idx[i+k]) { /* break in the block */ 685 flg = PETSC_FALSE; 686 break; 687 } 688 } 689 if (flg == PETSC_TRUE) { /* No break in the bs */ 690 sizes[j] = bs; 691 i+= bs; 692 } else { 693 sizes[j] = 1; 694 i++; 695 } 696 } 697 } 698 *bs_max = j; 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNC__ 703 #define __FUNC__ "MatZeroRows_SeqSBAIJ" 704 int MatZeroRows_SeqSBAIJ(Mat A,IS is,Scalar *diag) 705 { 706 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 707 int ierr,i,j,k,count,is_n,*is_idx,*rows; 708 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 709 Scalar zero = 0.0; 710 MatScalar *aa; 711 712 PetscFunctionBegin; 713 /* Make a copy of the IS and sort it */ 714 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 715 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 716 717 /* allocate memory for rows,sizes */ 718 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 719 sizes = rows + is_n; 720 721 /* initialize copy IS values to rows, and sort them */ 722 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 723 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 724 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 725 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 726 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 727 } else { 728 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 729 } 730 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 731 732 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 733 row = rows[j]; /* row to be zeroed */ 734 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 735 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 736 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 737 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 738 if (diag) { 739 if (sbaij->ilen[row/bs] > 0) { 740 sbaij->ilen[row/bs] = 1; 741 sbaij->j[sbaij->i[row/bs]] = row/bs; 742 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 743 } 744 /* Now insert all the diagoanl values for this bs */ 745 for (k=0; k<bs; k++) { 746 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 747 } 748 } else { /* (!diag) */ 749 sbaij->ilen[row/bs] = 0; 750 } /* end (!diag) */ 751 } else { /* (sizes[i] != bs), broken block */ 752 #if defined (PETSC_USE_DEBUG) 753 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 754 #endif 755 for (k=0; k<count; k++) { 756 aa[0] = zero; 757 aa+=bs; 758 } 759 if (diag) { 760 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 761 } 762 } 763 } 764 765 ierr = PetscFree(rows);CHKERRQ(ierr); 766 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 767 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 /* Only add/insert a(i,j) with i<=j (blocks). 772 Any a(i,j) with i>j input by user is ingored. 773 */ 774 775 #undef __FUNC__ 776 #define __FUNC__ "MatSetValues_SeqSBAIJ" 777 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 778 { 779 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 780 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 781 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 782 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 783 int ridx,cidx,bs2=a->bs2,ierr; 784 MatScalar *ap,value,*aa=a->a,*bap; 785 786 PetscFunctionBegin; 787 788 for (k=0; k<m; k++) { /* loop over added rows */ 789 row = im[k]; /* row number */ 790 brow = row/bs; /* block row number */ 791 if (row < 0) continue; 792 #if defined(PETSC_USE_BOPT_g) 793 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 794 #endif 795 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 796 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 797 rmax = imax[brow]; /* maximum space allocated for this row */ 798 nrow = ailen[brow]; /* actual length of this row */ 799 low = 0; 800 801 for (l=0; l<n; l++) { /* loop over added columns */ 802 if (in[l] < 0) continue; 803 #if defined(PETSC_USE_BOPT_g) 804 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 805 #endif 806 col = in[l]; 807 bcol = col/bs; /* block col number */ 808 809 if (brow <= bcol){ 810 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 811 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 812 /* element value a(k,l) */ 813 if (roworiented) { 814 value = v[l + k*n]; 815 } else { 816 value = v[k + l*m]; 817 } 818 819 /* move pointer bap to a(k,l) quickly and add/insert value */ 820 if (!sorted) low = 0; high = nrow; 821 while (high-low > 7) { 822 t = (low+high)/2; 823 if (rp[t] > bcol) high = t; 824 else low = t; 825 } 826 for (i=low; i<high; i++) { 827 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 828 if (rp[i] > bcol) break; 829 if (rp[i] == bcol) { 830 bap = ap + bs2*i + bs*cidx + ridx; 831 if (is == ADD_VALUES) *bap += value; 832 else *bap = value; 833 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 834 if (brow == bcol && ridx < cidx){ 835 bap = ap + bs2*i + bs*ridx + cidx; 836 if (is == ADD_VALUES) *bap += value; 837 else *bap = value; 838 } 839 goto noinsert1; 840 } 841 } 842 843 if (nonew == 1) goto noinsert1; 844 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 845 if (nrow >= rmax) { 846 /* there is no extra room in row, therefore enlarge */ 847 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 848 MatScalar *new_a; 849 850 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 851 852 /* Malloc new storage space */ 853 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 854 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 855 new_j = (int*)(new_a + bs2*new_nz); 856 new_i = new_j + new_nz; 857 858 /* copy over old data into new slots */ 859 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 860 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 861 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 862 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 863 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 864 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 865 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 866 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 867 /* free up old matrix storage */ 868 ierr = PetscFree(a->a);CHKERRQ(ierr); 869 if (!a->singlemalloc) { 870 ierr = PetscFree(a->i);CHKERRQ(ierr); 871 ierr = PetscFree(a->j);CHKERRQ(ierr); 872 } 873 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 874 a->singlemalloc = PETSC_TRUE; 875 876 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 877 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 878 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 879 a->s_maxnz += bs2*CHUNKSIZE; 880 a->reallocs++; 881 a->s_nz++; 882 } 883 884 N = nrow++ - 1; 885 /* shift up all the later entries in this row */ 886 for (ii=N; ii>=i; ii--) { 887 rp[ii+1] = rp[ii]; 888 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 889 } 890 if (N>=i) { 891 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 892 } 893 rp[i] = bcol; 894 ap[bs2*i + bs*cidx + ridx] = value; 895 noinsert1:; 896 low = i; 897 /* } */ 898 } 899 } /* end of if .. if.. */ 900 } /* end of loop over added columns */ 901 ailen[brow] = nrow; 902 } /* end of loop over added rows */ 903 904 PetscFunctionReturn(0); 905 } 906 907 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 908 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 909 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 910 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 911 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 912 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 913 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 914 extern int MatScale_SeqSBAIJ(Scalar*,Mat); 915 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 916 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 917 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 918 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 919 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 920 extern int MatZeroEntries_SeqSBAIJ(Mat); 921 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 922 923 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 924 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 925 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 926 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 927 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 928 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 929 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 930 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 931 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 932 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 933 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 934 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 935 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 936 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 937 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 938 939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 940 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 941 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 942 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 945 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 946 947 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 948 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 949 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 950 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 951 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 952 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 953 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 954 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 955 956 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 957 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 958 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 959 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 960 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 961 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 962 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 963 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 964 965 #ifdef HAVE_MatIncompleteCholeskyFactor 966 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 967 #undef __FUNC__ 968 #define __FUNC__ "MatIncompleteCholeskyFactor_SeqSBAIJ" 969 int MatIncompleteCholeskyFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 970 { 971 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 972 Mat outA; 973 int ierr; 974 PetscTruth row_identity,col_identity; 975 976 PetscFunctionBegin; 977 /* 978 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 979 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 980 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 981 if (!row_identity || !col_identity) { 982 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 983 } 984 */ 985 986 outA = inA; 987 inA->factor = FACTOR_LU; 988 989 if (!a->diag) { 990 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 991 } 992 /* 993 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 994 for ILU(0) factorization with natural ordering 995 */ 996 switch (a->bs) { 997 case 1: 998 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; /* 2? */ 999 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1000 case 2: 1001 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1002 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1003 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1004 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1005 break; 1006 case 3: 1007 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1008 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1009 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1010 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1011 break; 1012 case 4: 1013 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1014 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1016 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1017 break; 1018 case 5: 1019 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1020 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1021 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1022 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1023 break; 1024 case 6: 1025 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1026 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1027 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1028 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1029 break; 1030 case 7: 1031 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1032 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1033 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1034 PLogInfo(inA,"MatIncompleteCholeskyFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1035 break; 1036 default: 1037 a->row = row; 1038 a->icol = col; 1039 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1040 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1041 1042 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1043 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1044 PLogObjectParent(inA,a->icol); 1045 1046 if (!a->solve_work) { 1047 a->solve_work = (Scalar*)PetscMalloc((A->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1048 PLogObjectMemory(inA,(A->m+a->bs)*sizeof(Scalar)); 1049 } 1050 } 1051 1052 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1053 1054 PetscFunctionReturn(0); 1055 } 1056 #endif 1057 1058 #undef __FUNC__ 1059 #define __FUNC__ "MatPrintHelp_SeqSBAIJ" 1060 int MatPrintHelp_SeqSBAIJ(Mat A) 1061 { 1062 static PetscTruth called = PETSC_FALSE; 1063 MPI_Comm comm = A->comm; 1064 int ierr; 1065 1066 PetscFunctionBegin; 1067 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1068 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1069 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 EXTERN_C_BEGIN 1074 #undef __FUNC__ 1075 #define __FUNC__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1076 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1077 { 1078 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1079 int i,nz,n; 1080 1081 PetscFunctionBegin; 1082 nz = baij->s_maxnz; 1083 n = mat->n; 1084 for (i=0; i<nz; i++) { 1085 baij->j[i] = indices[i]; 1086 } 1087 baij->s_nz = nz; 1088 for (i=0; i<n; i++) { 1089 baij->ilen[i] = baij->imax[i]; 1090 } 1091 1092 PetscFunctionReturn(0); 1093 } 1094 EXTERN_C_END 1095 1096 #undef __FUNC__ 1097 #define __FUNC__ "MatSeqSBAIJSetColumnIndices" 1098 /*@ 1099 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1100 in the matrix. 1101 1102 Input Parameters: 1103 + mat - the SeqSBAIJ matrix 1104 - indices - the column indices 1105 1106 Level: advanced 1107 1108 Notes: 1109 This can be called if you have precomputed the nonzero structure of the 1110 matrix and want to provide it to the matrix object to improve the performance 1111 of the MatSetValues() operation. 1112 1113 You MUST have set the correct numbers of nonzeros per row in the call to 1114 MatCreateSeqSBAIJ(). 1115 1116 MUST be called before any calls to MatSetValues() 1117 1118 .seealso: MatCreateSeqSBAIJ 1119 @*/ 1120 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1121 { 1122 int ierr,(*f)(Mat,int *); 1123 1124 PetscFunctionBegin; 1125 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1126 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1127 if (f) { 1128 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1129 } else { 1130 SETERRQ(1,"Wrong type of matrix to set column indices"); 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 /* -------------------------------------------------------------------*/ 1136 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1137 MatGetRow_SeqSBAIJ, 1138 MatRestoreRow_SeqSBAIJ, 1139 MatMult_SeqSBAIJ_N, 1140 MatMultAdd_SeqSBAIJ_N, 1141 MatMultTranspose_SeqSBAIJ, 1142 MatMultTransposeAdd_SeqSBAIJ, 1143 MatSolve_SeqSBAIJ_N, 1144 0, 1145 0, 1146 0, 1147 0, 1148 MatCholeskyFactor_SeqSBAIJ, 1149 0, 1150 MatTranspose_SeqSBAIJ, 1151 MatGetInfo_SeqSBAIJ, 1152 MatEqual_SeqSBAIJ, 1153 MatGetDiagonal_SeqSBAIJ, 1154 MatDiagonalScale_SeqSBAIJ, 1155 MatNorm_SeqSBAIJ, 1156 0, 1157 MatAssemblyEnd_SeqSBAIJ, 1158 0, 1159 MatSetOption_SeqSBAIJ, 1160 MatZeroEntries_SeqSBAIJ, 1161 MatZeroRows_SeqSBAIJ, 1162 0, 1163 0, 1164 MatCholeskyFactorSymbolic_SeqSBAIJ, 1165 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1166 0, 1167 0, 1168 MatGetOwnershipRange_SeqSBAIJ, 1169 0, 1170 MatIncompleteCholeskyFactorSymbolic_SeqSBAIJ, 1171 0, 1172 0, 1173 MatDuplicate_SeqSBAIJ, 1174 0, 1175 0, 1176 0, 1177 0, 1178 0, 1179 MatGetSubMatrices_SeqSBAIJ, 1180 MatIncreaseOverlap_SeqSBAIJ, 1181 MatGetValues_SeqSBAIJ, 1182 0, 1183 MatPrintHelp_SeqSBAIJ, 1184 MatScale_SeqSBAIJ, 1185 0, 1186 0, 1187 0, 1188 MatGetBlockSize_SeqSBAIJ, 1189 MatGetRowIJ_SeqSBAIJ, 1190 MatRestoreRowIJ_SeqSBAIJ, 1191 0, 1192 0, 1193 0, 1194 0, 1195 0, 1196 0, 1197 MatSetValuesBlocked_SeqSBAIJ, 1198 MatGetSubMatrix_SeqSBAIJ, 1199 0, 1200 0, 1201 MatGetMaps_Petsc, 1202 0, 1203 0, 1204 0, 1205 0, 1206 0, 1207 0, 1208 MatGetRowMax_SeqSBAIJ}; 1209 1210 EXTERN_C_BEGIN 1211 #undef __FUNC__ 1212 #define __FUNC__ "MatStoreValues_SeqSBAIJ" 1213 int MatStoreValues_SeqSBAIJ(Mat mat) 1214 { 1215 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1216 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1217 int ierr; 1218 1219 PetscFunctionBegin; 1220 if (aij->nonew != 1) { 1221 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1222 } 1223 1224 /* allocate space for values if not already there */ 1225 if (!aij->saved_values) { 1226 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1227 } 1228 1229 /* copy values over */ 1230 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 EXTERN_C_END 1234 1235 EXTERN_C_BEGIN 1236 #undef __FUNC__ 1237 #define __FUNC__ "MatRetrieveValues_SeqSBAIJ" 1238 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1239 { 1240 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1241 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1242 1243 PetscFunctionBegin; 1244 if (aij->nonew != 1) { 1245 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1246 } 1247 if (!aij->saved_values) { 1248 SETERRQ(1,"Must call MatStoreValues(A);first"); 1249 } 1250 1251 /* copy values over */ 1252 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 EXTERN_C_END 1256 1257 EXTERN_C_BEGIN 1258 #undef __FUNC__ 1259 #define __FUNC__ "MatCreate_SeqSBAIJ" 1260 int MatCreate_SeqSBAIJ(Mat B) 1261 { 1262 Mat_SeqSBAIJ *b; 1263 int i,ierr,size; 1264 PetscTruth flg; 1265 int s_nz; 1266 1267 PetscFunctionBegin; 1268 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1269 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1270 1271 B->data = (void*)(b = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(b); 1272 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1273 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1274 B->ops->destroy = MatDestroy_SeqSBAIJ; 1275 B->ops->view = MatView_SeqSBAIJ; 1276 B->factor = 0; 1277 B->lupivotthreshold = 1.0; 1278 B->mapping = 0; 1279 b->row = 0; 1280 b->icol = 0; 1281 b->reallocs = 0; 1282 b->saved_values = 0; 1283 1284 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1285 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1286 1287 b->sorted = PETSC_FALSE; 1288 b->roworiented = PETSC_TRUE; 1289 b->nonew = 0; 1290 b->diag = 0; 1291 b->solve_work = 0; 1292 b->mult_work = 0; 1293 b->spptr = 0; 1294 b->keepzeroedrows = PETSC_FALSE; 1295 1296 b->inew = 0; 1297 b->jnew = 0; 1298 b->anew = 0; 1299 b->a2anew = 0; 1300 b->permute = PETSC_FALSE; 1301 1302 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1303 "MatStoreValues_SeqSBAIJ", 1304 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1306 "MatRetrieveValues_SeqSBAIJ", 1307 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1308 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1309 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1310 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 EXTERN_C_END 1314 1315 #undef __FUNC__ 1316 #define __FUNC__ "MatSeqSBAIJSetNonzeroStructure" 1317 /*@C 1318 MatSeqSBAIJSetNonzeroStructure - Creates a sparse symmetric matrix in block AIJ (block 1319 compressed row) format. For good matrix assembly performance the 1320 user should preallocate the matrix storage by setting the parameter nz 1321 (or the array nnz). By setting these parameters accurately, performance 1322 during matrix assembly can be increased by more than a factor of 50. 1323 1324 Collective on Mat 1325 1326 Input Parameters: 1327 + A - the symmetric matrix 1328 . bs - size of block 1329 . nz - number of block nonzeros per block row (same for all rows) 1330 - nnz - array containing the number of block nonzeros in the various block rows 1331 (possibly different for each block row) or PETSC_NULL 1332 1333 Options Database Keys: 1334 . -mat_no_unroll - uses code that does not unroll the loops in the 1335 block calculations (much slower) 1336 . -mat_block_size - size of the blocks to use 1337 1338 Level: intermediate 1339 1340 Notes: 1341 The block AIJ format is compatible with standard Fortran 77 1342 storage. That is, the stored row and column indices can begin at 1343 either one (as in Fortran) or zero. See the users' manual for details. 1344 1345 Specify the preallocated storage with either nz or nnz (not both). 1346 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1347 allocation. For additional details, see the users manual chapter on 1348 matrices. 1349 1350 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1351 @*/ 1352 int MatSeqSBAIJSetNonzeroStructure(Mat B,int bs,int nz,int *nnz) 1353 { 1354 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1355 int i,len,ierr,mbs,bs2,size; 1356 PetscTruth flg; 1357 int s_nz; 1358 1359 PetscFunctionBegin; 1360 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1361 mbs = B->m/bs; 1362 bs2 = bs*bs; 1363 1364 if (mbs*bs != B->m) { 1365 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1366 } 1367 1368 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than -2: value %d",nz); 1369 if (nnz) { 1370 for (i=0; i<mbs; i++) { 1371 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1372 } 1373 } 1374 1375 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1376 if (!flg) { 1377 switch (bs) { 1378 case 1: 1379 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1380 B->ops->solve = MatSolve_SeqSBAIJ_1; 1381 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1382 B->ops->mult = MatMult_SeqSBAIJ_1; 1383 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1384 break; 1385 case 2: 1386 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1387 B->ops->solve = MatSolve_SeqSBAIJ_2; 1388 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1389 B->ops->mult = MatMult_SeqSBAIJ_2; 1390 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1391 break; 1392 case 3: 1393 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1394 B->ops->solve = MatSolve_SeqSBAIJ_3; 1395 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1396 B->ops->mult = MatMult_SeqSBAIJ_3; 1397 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1398 break; 1399 case 4: 1400 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1401 B->ops->solve = MatSolve_SeqSBAIJ_4; 1402 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1403 B->ops->mult = MatMult_SeqSBAIJ_4; 1404 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1405 break; 1406 case 5: 1407 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1408 B->ops->solve = MatSolve_SeqSBAIJ_5; 1409 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1410 B->ops->mult = MatMult_SeqSBAIJ_5; 1411 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1412 break; 1413 case 6: 1414 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1415 B->ops->solve = MatSolve_SeqSBAIJ_6; 1416 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1417 B->ops->mult = MatMult_SeqSBAIJ_6; 1418 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1419 break; 1420 case 7: 1421 B->ops->mult = MatMult_SeqSBAIJ_7; 1422 B->ops->solve = MatSolve_SeqSBAIJ_7; 1423 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1424 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1425 break; 1426 } 1427 } 1428 1429 b->mbs = mbs; 1430 b->nbs = mbs; 1431 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1432 if (!nnz) { 1433 if (nz == PETSC_DEFAULT) nz = 5; 1434 else if (nz <= 0) nz = 1; 1435 for (i=0; i<mbs; i++) { 1436 b->imax[i] = nz; 1437 } 1438 nz = nz*mbs; /* total nz */ 1439 } else { 1440 nz = 0; 1441 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1442 } 1443 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1444 s_nz = nz; 1445 1446 /* allocate the matrix space */ 1447 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1448 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1449 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1450 b->j = (int*)(b->a + s_nz*bs2); 1451 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1452 b->i = b->j + s_nz; 1453 b->singlemalloc = PETSC_TRUE; 1454 1455 /* pointer to beginning of each row */ 1456 b->i[0] = 0; 1457 for (i=1; i<mbs+1; i++) { 1458 b->i[i] = b->i[i-1] + b->imax[i-1]; 1459 } 1460 1461 /* b->ilen will count nonzeros in each block row so far. */ 1462 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1463 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1464 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1465 1466 b->bs = bs; 1467 b->bs2 = bs2; 1468 b->s_nz = 0; 1469 b->s_maxnz = s_nz*bs2; 1470 1471 b->inew = 0; 1472 b->jnew = 0; 1473 b->anew = 0; 1474 b->a2anew = 0; 1475 b->permute = PETSC_FALSE; 1476 PetscFunctionReturn(0); 1477 } 1478 1479 1480 #undef __FUNC__ 1481 #define __FUNC__ "MatCreateSeqSBAIJ" 1482 /*@C 1483 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1484 compressed row) format. For good matrix assembly performance the 1485 user should preallocate the matrix storage by setting the parameter nz 1486 (or the array nnz). By setting these parameters accurately, performance 1487 during matrix assembly can be increased by more than a factor of 50. 1488 1489 Collective on MPI_Comm 1490 1491 Input Parameters: 1492 + comm - MPI communicator, set to PETSC_COMM_SELF 1493 . bs - size of block 1494 . m - number of rows, or number of columns 1495 . nz - number of block nonzeros per block row (same for all rows) 1496 - nnz - array containing the number of block nonzeros in the various block rows 1497 (possibly different for each block row) or PETSC_NULL 1498 1499 Output Parameter: 1500 . A - the symmetric matrix 1501 1502 Options Database Keys: 1503 . -mat_no_unroll - uses code that does not unroll the loops in the 1504 block calculations (much slower) 1505 . -mat_block_size - size of the blocks to use 1506 1507 Level: intermediate 1508 1509 Notes: 1510 The block AIJ format is compatible with standard Fortran 77 1511 storage. That is, the stored row and column indices can begin at 1512 either one (as in Fortran) or zero. See the users' manual for details. 1513 1514 Specify the preallocated storage with either nz or nnz (not both). 1515 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1516 allocation. For additional details, see the users manual chapter on 1517 matrices. 1518 1519 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1520 @*/ 1521 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1522 { 1523 int ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1527 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1528 ierr = MatSeqSBAIJSetNonzeroStructure(*A,bs,nz,nnz);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNC__ 1533 #define __FUNC__ "MatDuplicate_SeqSBAIJ" 1534 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1535 { 1536 Mat C; 1537 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1538 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1539 1540 PetscFunctionBegin; 1541 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1542 1543 *B = 0; 1544 ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&C);CHKERRQ(ierr); 1545 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1546 C->data = (void*)(c = PetscNew(Mat_SeqSBAIJ));CHKPTRQ(c); 1547 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1548 C->ops->destroy = MatDestroy_SeqSBAIJ; 1549 C->ops->view = MatView_SeqSBAIJ; 1550 C->factor = A->factor; 1551 c->row = 0; 1552 c->icol = 0; 1553 c->saved_values = 0; 1554 c->keepzeroedrows = a->keepzeroedrows; 1555 C->assembled = PETSC_TRUE; 1556 1557 c->bs = a->bs; 1558 c->bs2 = a->bs2; 1559 c->mbs = a->mbs; 1560 c->nbs = a->nbs; 1561 1562 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1563 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1564 for (i=0; i<mbs; i++) { 1565 c->imax[i] = a->imax[i]; 1566 c->ilen[i] = a->ilen[i]; 1567 } 1568 1569 /* allocate the matrix space */ 1570 c->singlemalloc = PETSC_TRUE; 1571 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1572 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1573 c->j = (int*)(c->a + nz*bs2); 1574 c->i = c->j + nz; 1575 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1576 if (mbs > 0) { 1577 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1578 if (cpvalues == MAT_COPY_VALUES) { 1579 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1580 } else { 1581 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1582 } 1583 } 1584 1585 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1586 c->sorted = a->sorted; 1587 c->roworiented = a->roworiented; 1588 c->nonew = a->nonew; 1589 1590 if (a->diag) { 1591 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1592 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1593 for (i=0; i<mbs; i++) { 1594 c->diag[i] = a->diag[i]; 1595 } 1596 } else c->diag = 0; 1597 c->s_nz = a->s_nz; 1598 c->s_maxnz = a->s_maxnz; 1599 c->solve_work = 0; 1600 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1601 c->mult_work = 0; 1602 *B = C; 1603 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 EXTERN_C_BEGIN 1608 #undef __FUNC__ 1609 #define __FUNC__ "MatLoad_SeqSBAIJ" 1610 int MatLoad_SeqSBAIJ(Viewer viewer,MatType type,Mat *A) 1611 { 1612 Mat_SeqSBAIJ *a; 1613 Mat B; 1614 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1615 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,*s_browlengths,maskcount; 1616 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1617 int *masked,nmask,tmp,bs2,ishift; 1618 Scalar *aa; 1619 MPI_Comm comm = ((PetscObject)viewer)->comm; 1620 1621 PetscFunctionBegin; 1622 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1623 bs2 = bs*bs; 1624 1625 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1626 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1627 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1628 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1629 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1630 M = header[1]; N = header[2]; nz = header[3]; 1631 1632 if (header[3] < 0) { 1633 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1634 } 1635 1636 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1637 1638 /* 1639 This code adds extra rows to make sure the number of rows is 1640 divisible by the blocksize 1641 */ 1642 mbs = M/bs; 1643 extra_rows = bs - M + bs*(mbs); 1644 if (extra_rows == bs) extra_rows = 0; 1645 else mbs++; 1646 if (extra_rows) { 1647 PLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1648 } 1649 1650 /* read in row lengths */ 1651 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1652 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1653 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1654 1655 /* read in column indices */ 1656 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1657 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1658 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1659 1660 /* loop over row lengths determining block row lengths */ 1661 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1662 s_browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(s_browlengths); 1663 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1664 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1665 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1666 masked = mask + mbs; 1667 rowcount = 0; nzcount = 0; 1668 for (i=0; i<mbs; i++) { 1669 nmask = 0; 1670 for (j=0; j<bs; j++) { 1671 kmax = rowlengths[rowcount]; 1672 for (k=0; k<kmax; k++) { 1673 tmp = jj[nzcount++]/bs; /* block col. index */ 1674 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1675 } 1676 rowcount++; 1677 } 1678 s_browlengths[i] += nmask; 1679 browlengths[i] = 2*s_browlengths[i]; 1680 1681 /* zero out the mask elements we set */ 1682 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1683 } 1684 1685 /* create our matrix */ 1686 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1687 B = *A; 1688 a = (Mat_SeqSBAIJ*)B->data; 1689 1690 /* set matrix "i" values */ 1691 a->i[0] = 0; 1692 for (i=1; i<= mbs; i++) { 1693 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1694 a->ilen[i-1] = s_browlengths[i-1]; 1695 } 1696 a->s_nz = 0; 1697 for (i=0; i<mbs; i++) a->s_nz += s_browlengths[i]; 1698 1699 /* read in nonzero values */ 1700 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1701 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1702 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1703 1704 /* set "a" and "j" values into matrix */ 1705 nzcount = 0; jcount = 0; 1706 for (i=0; i<mbs; i++) { 1707 nzcountb = nzcount; 1708 nmask = 0; 1709 for (j=0; j<bs; j++) { 1710 kmax = rowlengths[i*bs+j]; 1711 for (k=0; k<kmax; k++) { 1712 tmp = jj[nzcount++]/bs; /* block col. index */ 1713 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1714 } 1715 rowcount++; 1716 } 1717 /* sort the masked values */ 1718 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1719 1720 /* set "j" values into matrix */ 1721 maskcount = 1; 1722 for (j=0; j<nmask; j++) { 1723 a->j[jcount++] = masked[j]; 1724 mask[masked[j]] = maskcount++; 1725 } 1726 1727 /* set "a" values into matrix */ 1728 ishift = bs2*a->i[i]; 1729 for (j=0; j<bs; j++) { 1730 kmax = rowlengths[i*bs+j]; 1731 for (k=0; k<kmax; k++) { 1732 tmp = jj[nzcountb]/bs ; /* block col. index */ 1733 if (tmp >= i){ 1734 block = mask[tmp] - 1; 1735 point = jj[nzcountb] - bs*tmp; 1736 idx = ishift + bs2*block + j + bs*point; 1737 a->a[idx] = aa[nzcountb]; 1738 } 1739 nzcountb++; 1740 } 1741 } 1742 /* zero out the mask elements we set */ 1743 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1744 } 1745 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1746 1747 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1748 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1749 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1750 ierr = PetscFree(aa);CHKERRQ(ierr); 1751 ierr = PetscFree(jj);CHKERRQ(ierr); 1752 ierr = PetscFree(mask);CHKERRQ(ierr); 1753 1754 B->assembled = PETSC_TRUE; 1755 ierr = MatView_Private(B);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 EXTERN_C_END 1759 1760 1761 1762