1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int s_nz = a->i[n]; 63 for (i=0; i<s_nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int s_nz = a->i[n]-1; 84 for (i=0; i<s_nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_SYMMETRIC: 183 case MAT_STRUCTURALLY_SYMMETRIC: 184 break; 185 default: 186 SETERRQ(PETSC_ERR_SUP,"unknown option"); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 193 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 194 { 195 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 196 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 197 MatScalar *aa,*aa_i; 198 PetscScalar *v_i; 199 200 PetscFunctionBegin; 201 bs = a->bs; 202 ai = a->i; 203 aj = a->j; 204 aa = a->a; 205 bs2 = a->bs2; 206 207 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 208 209 bn = row/bs; /* Block number */ 210 bp = row % bs; /* Block position */ 211 M = ai[bn+1] - ai[bn]; 212 *ncols = bs*M; 213 214 if (v) { 215 *v = 0; 216 if (*ncols) { 217 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 218 for (i=0; i<M; i++) { /* for each block in the block row */ 219 v_i = *v + i*bs; 220 aa_i = aa + bs2*(ai[bn] + i); 221 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 222 } 223 } 224 } 225 226 if (cols) { 227 *cols = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 cols_i = *cols + i*bs; 232 itmp = bs*aj[ai[bn] + i]; 233 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 234 } 235 } 236 } 237 238 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 239 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 240 #ifdef column_search 241 v_i = *v + M*bs; 242 cols_i = *cols + M*bs; 243 for (i=0; i<bn; i++){ /* for each block row */ 244 M = ai[i+1] - ai[i]; 245 for (j=0; j<M; j++){ 246 itmp = aj[ai[i] + j]; /* block column value */ 247 if (itmp == bn){ 248 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 249 for (k=0; k<bs; k++) { 250 *cols_i++ = i*bs+k; 251 *v_i++ = aa_i[k]; 252 } 253 *ncols += bs; 254 break; 255 } 256 } 257 } 258 #endif 259 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 265 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 266 { 267 int ierr; 268 269 PetscFunctionBegin; 270 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 271 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 277 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 278 { 279 int ierr; 280 PetscFunctionBegin; 281 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 287 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 288 { 289 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 290 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 291 PetscScalar *aa; 292 FILE *file; 293 294 PetscFunctionBegin; 295 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 296 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 297 col_lens[0] = MAT_FILE_COOKIE; 298 299 col_lens[1] = A->m; 300 col_lens[2] = A->m; 301 col_lens[3] = a->s_nz*bs2; 302 303 /* store lengths of each row and write (including header) to file */ 304 count = 0; 305 for (i=0; i<a->mbs; i++) { 306 for (j=0; j<bs; j++) { 307 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 308 } 309 } 310 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 311 ierr = PetscFree(col_lens);CHKERRQ(ierr); 312 313 /* store column indices (zero start index) */ 314 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 315 count = 0; 316 for (i=0; i<a->mbs; i++) { 317 for (j=0; j<bs; j++) { 318 for (k=a->i[i]; k<a->i[i+1]; k++) { 319 for (l=0; l<bs; l++) { 320 jj[count++] = bs*a->j[k] + l; 321 } 322 } 323 } 324 } 325 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 326 ierr = PetscFree(jj);CHKERRQ(ierr); 327 328 /* store nonzero values */ 329 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 330 count = 0; 331 for (i=0; i<a->mbs; i++) { 332 for (j=0; j<bs; j++) { 333 for (k=a->i[i]; k<a->i[i+1]; k++) { 334 for (l=0; l<bs; l++) { 335 aa[count++] = a->a[bs2*k + l*bs + j]; 336 } 337 } 338 } 339 } 340 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 341 ierr = PetscFree(aa);CHKERRQ(ierr); 342 343 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 344 if (file) { 345 fprintf(file,"-matload_block_size %d\n",a->bs); 346 } 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 352 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 353 { 354 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 355 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 356 char *name; 357 PetscViewerFormat format; 358 359 PetscFunctionBegin; 360 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 361 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 362 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 363 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 364 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 365 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 366 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 367 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 368 for (i=0; i<a->mbs; i++) { 369 for (j=0; j<bs; j++) { 370 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 371 for (k=a->i[i]; k<a->i[i+1]; k++) { 372 for (l=0; l<bs; l++) { 373 #if defined(PETSC_USE_COMPLEX) 374 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 375 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 376 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 377 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 378 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 379 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 380 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 381 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 382 } 383 #else 384 if (a->a[bs2*k + l*bs + j] != 0.0) { 385 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 386 } 387 #endif 388 } 389 } 390 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 391 } 392 } 393 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 394 } else { 395 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 396 for (i=0; i<a->mbs; i++) { 397 for (j=0; j<bs; j++) { 398 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 399 for (k=a->i[i]; k<a->i[i+1]; k++) { 400 for (l=0; l<bs; l++) { 401 #if defined(PETSC_USE_COMPLEX) 402 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 403 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 404 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 405 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 406 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 407 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 408 } else { 409 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 410 } 411 #else 412 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 413 #endif 414 } 415 } 416 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 417 } 418 } 419 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 420 } 421 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 427 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 428 { 429 Mat A = (Mat) Aa; 430 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 431 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 432 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 433 MatScalar *aa; 434 MPI_Comm comm; 435 PetscViewer viewer; 436 437 PetscFunctionBegin; 438 /* 439 This is nasty. If this is called from an originally parallel matrix 440 then all processes call this,but only the first has the matrix so the 441 rest should return immediately. 442 */ 443 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 444 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 445 if (rank) PetscFunctionReturn(0); 446 447 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 448 449 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 450 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 451 452 /* loop over matrix elements drawing boxes */ 453 color = PETSC_DRAW_BLUE; 454 for (i=0,row=0; i<mbs; i++,row+=bs) { 455 for (j=a->i[i]; j<a->i[i+1]; j++) { 456 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 457 x_l = a->j[j]*bs; x_r = x_l + 1.0; 458 aa = a->a + j*bs2; 459 for (k=0; k<bs; k++) { 460 for (l=0; l<bs; l++) { 461 if (PetscRealPart(*aa++) >= 0.) continue; 462 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 463 } 464 } 465 } 466 } 467 color = PETSC_DRAW_CYAN; 468 for (i=0,row=0; i<mbs; i++,row+=bs) { 469 for (j=a->i[i]; j<a->i[i+1]; j++) { 470 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 471 x_l = a->j[j]*bs; x_r = x_l + 1.0; 472 aa = a->a + j*bs2; 473 for (k=0; k<bs; k++) { 474 for (l=0; l<bs; l++) { 475 if (PetscRealPart(*aa++) != 0.) continue; 476 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 477 } 478 } 479 } 480 } 481 482 color = PETSC_DRAW_RED; 483 for (i=0,row=0; i<mbs; i++,row+=bs) { 484 for (j=a->i[i]; j<a->i[i+1]; j++) { 485 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 486 x_l = a->j[j]*bs; x_r = x_l + 1.0; 487 aa = a->a + j*bs2; 488 for (k=0; k<bs; k++) { 489 for (l=0; l<bs; l++) { 490 if (PetscRealPart(*aa++) <= 0.) continue; 491 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 492 } 493 } 494 } 495 } 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 501 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 502 { 503 int ierr; 504 PetscReal xl,yl,xr,yr,w,h; 505 PetscDraw draw; 506 PetscTruth isnull; 507 508 PetscFunctionBegin; 509 510 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 511 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 512 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 514 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 515 xr += w; yr += h; xl = -w; yl = -h; 516 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 517 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 518 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "MatView_SeqSBAIJ" 524 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 525 { 526 int ierr; 527 PetscTruth issocket,isascii,isbinary,isdraw; 528 529 PetscFunctionBegin; 530 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 531 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 534 if (issocket) { 535 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 536 } else if (isascii){ 537 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 538 } else if (isbinary) { 539 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 540 } else if (isdraw) { 541 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 542 } else { 543 SETERRQ1(1,"Viewer type %s not supported by SeqSBAIJ matrices",((PetscObject)viewer)->type_name); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 551 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 552 { 553 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 554 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 555 int *ai = a->i,*ailen = a->ilen; 556 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 557 MatScalar *ap,*aa = a->a,zero = 0.0; 558 559 PetscFunctionBegin; 560 for (k=0; k<m; k++) { /* loop over rows */ 561 row = im[k]; brow = row/bs; 562 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 563 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 564 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 565 nrow = ailen[brow]; 566 for (l=0; l<n; l++) { /* loop over columns */ 567 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 568 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 569 col = in[l] ; 570 bcol = col/bs; 571 cidx = col%bs; 572 ridx = row%bs; 573 high = nrow; 574 low = 0; /* assume unsorted */ 575 while (high-low > 5) { 576 t = (low+high)/2; 577 if (rp[t] > bcol) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > bcol) break; 582 if (rp[i] == bcol) { 583 *v++ = ap[bs2*i+bs*cidx+ridx]; 584 goto finished; 585 } 586 } 587 *v++ = zero; 588 finished:; 589 } 590 } 591 PetscFunctionReturn(0); 592 } 593 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 597 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 598 { 599 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 600 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 601 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 602 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 603 PetscTruth roworiented=a->roworiented; 604 const MatScalar *value = v; 605 MatScalar *ap,*aa = a->a,*bap; 606 607 PetscFunctionBegin; 608 if (roworiented) { 609 stepval = (n-1)*bs; 610 } else { 611 stepval = (m-1)*bs; 612 } 613 for (k=0; k<m; k++) { /* loop over added rows */ 614 row = im[k]; 615 if (row < 0) continue; 616 #if defined(PETSC_USE_BOPT_g) 617 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 618 #endif 619 rp = aj + ai[row]; 620 ap = aa + bs2*ai[row]; 621 rmax = imax[row]; 622 nrow = ailen[row]; 623 low = 0; 624 for (l=0; l<n; l++) { /* loop over added columns */ 625 if (in[l] < 0) continue; 626 col = in[l]; 627 #if defined(PETSC_USE_BOPT_g) 628 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 629 #endif 630 if (col < row) continue; /* ignore lower triangular block */ 631 if (roworiented) { 632 value = v + k*(stepval+bs)*bs + l*bs; 633 } else { 634 value = v + l*(stepval+bs)*bs + k*bs; 635 } 636 if (!sorted) low = 0; high = nrow; 637 while (high-low > 7) { 638 t = (low+high)/2; 639 if (rp[t] > col) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > col) break; 644 if (rp[i] == col) { 645 bap = ap + bs2*i; 646 if (roworiented) { 647 if (is == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 bap[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 bap[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 if (is == ADD_VALUES) { 662 for (ii=0; ii<bs; ii++,value+=stepval) { 663 for (jj=0; jj<bs; jj++) { 664 *bap++ += *value++; 665 } 666 } 667 } else { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=0; jj<bs; jj++) { 670 *bap++ = *value++; 671 } 672 } 673 } 674 } 675 goto noinsert2; 676 } 677 } 678 if (nonew == 1) goto noinsert2; 679 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 680 if (nrow >= rmax) { 681 /* there is no extra room in row, therefore enlarge */ 682 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 683 MatScalar *new_a; 684 685 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 686 687 /* malloc new storage space */ 688 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 689 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 690 new_j = (int*)(new_a + bs2*new_nz); 691 new_i = new_j + new_nz; 692 693 /* copy over old data into new slots */ 694 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 695 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 696 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 697 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 698 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 699 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 700 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 701 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 702 /* free up old matrix storage */ 703 ierr = PetscFree(a->a);CHKERRQ(ierr); 704 if (!a->singlemalloc) { 705 ierr = PetscFree(a->i);CHKERRQ(ierr); 706 ierr = PetscFree(a->j);CHKERRQ(ierr); 707 } 708 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 709 a->singlemalloc = PETSC_TRUE; 710 711 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 712 rmax = imax[row] = imax[row] + CHUNKSIZE; 713 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 714 a->s_maxnz += bs2*CHUNKSIZE; 715 a->reallocs++; 716 a->s_nz++; 717 } 718 N = nrow++ - 1; 719 /* shift up all the later entries in this row */ 720 for (ii=N; ii>=i; ii--) { 721 rp[ii+1] = rp[ii]; 722 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 if (N >= i) { 725 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 726 } 727 rp[i] = col; 728 bap = ap + bs2*i; 729 if (roworiented) { 730 for (ii=0; ii<bs; ii++,value+=stepval) { 731 for (jj=ii; jj<bs2; jj+=bs) { 732 bap[jj] = *value++; 733 } 734 } 735 } else { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=0; jj<bs; jj++) { 738 *bap++ = *value++; 739 } 740 } 741 } 742 noinsert2:; 743 low = i; 744 } 745 ailen[row] = nrow; 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 753 { 754 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 755 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 756 int m = A->m,*ip,N,*ailen = a->ilen; 757 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 758 MatScalar *aa = a->a,*ap; 759 760 PetscFunctionBegin; 761 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 762 763 if (m) rmax = ailen[0]; 764 for (i=1; i<mbs; i++) { 765 /* move each row back by the amount of empty slots (fshift) before it*/ 766 fshift += imax[i-1] - ailen[i-1]; 767 rmax = PetscMax(rmax,ailen[i]); 768 if (fshift) { 769 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 770 N = ailen[i]; 771 for (j=0; j<N; j++) { 772 ip[j-fshift] = ip[j]; 773 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 774 } 775 } 776 ai[i] = ai[i-1] + ailen[i-1]; 777 } 778 if (mbs) { 779 fshift += imax[mbs-1] - ailen[mbs-1]; 780 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 781 } 782 /* reset ilen and imax for each row */ 783 for (i=0; i<mbs; i++) { 784 ailen[i] = imax[i] = ai[i+1] - ai[i]; 785 } 786 a->s_nz = ai[mbs]; 787 788 /* diagonals may have moved, reset it */ 789 if (a->diag) { 790 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 791 } 792 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 793 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 794 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 795 a->reallocs); 796 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 797 a->reallocs = 0; 798 A->info.nz_unneeded = (PetscReal)fshift*bs2; 799 800 PetscFunctionReturn(0); 801 } 802 803 /* 804 This function returns an array of flags which indicate the locations of contiguous 805 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 806 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 807 Assume: sizes should be long enough to hold all the values. 808 */ 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 811 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 812 { 813 int i,j,k,row; 814 PetscTruth flg; 815 816 PetscFunctionBegin; 817 for (i=0,j=0; i<n; j++) { 818 row = idx[i]; 819 if (row%bs!=0) { /* Not the begining of a block */ 820 sizes[j] = 1; 821 i++; 822 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 823 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 824 i++; 825 } else { /* Begining of the block, so check if the complete block exists */ 826 flg = PETSC_TRUE; 827 for (k=1; k<bs; k++) { 828 if (row+k != idx[i+k]) { /* break in the block */ 829 flg = PETSC_FALSE; 830 break; 831 } 832 } 833 if (flg == PETSC_TRUE) { /* No break in the bs */ 834 sizes[j] = bs; 835 i+= bs; 836 } else { 837 sizes[j] = 1; 838 i++; 839 } 840 } 841 } 842 *bs_max = j; 843 PetscFunctionReturn(0); 844 } 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 848 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 849 { 850 PetscFunctionBegin; 851 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 852 } 853 854 /* Only add/insert a(i,j) with i<=j (blocks). 855 Any a(i,j) with i>j input by user is ingored. 856 */ 857 858 #undef __FUNCT__ 859 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 860 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 861 { 862 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 863 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 864 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 865 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 866 int ridx,cidx,bs2=a->bs2,ierr; 867 MatScalar *ap,value,*aa=a->a,*bap; 868 869 PetscFunctionBegin; 870 871 for (k=0; k<m; k++) { /* loop over added rows */ 872 row = im[k]; /* row number */ 873 brow = row/bs; /* block row number */ 874 if (row < 0) continue; 875 #if defined(PETSC_USE_BOPT_g) 876 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 877 #endif 878 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 879 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 880 rmax = imax[brow]; /* maximum space allocated for this row */ 881 nrow = ailen[brow]; /* actual length of this row */ 882 low = 0; 883 884 for (l=0; l<n; l++) { /* loop over added columns */ 885 if (in[l] < 0) continue; 886 #if defined(PETSC_USE_BOPT_g) 887 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 888 #endif 889 col = in[l]; 890 bcol = col/bs; /* block col number */ 891 892 if (brow <= bcol){ 893 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 894 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 895 /* element value a(k,l) */ 896 if (roworiented) { 897 value = v[l + k*n]; 898 } else { 899 value = v[k + l*m]; 900 } 901 902 /* move pointer bap to a(k,l) quickly and add/insert value */ 903 if (!sorted) low = 0; high = nrow; 904 while (high-low > 7) { 905 t = (low+high)/2; 906 if (rp[t] > bcol) high = t; 907 else low = t; 908 } 909 for (i=low; i<high; i++) { 910 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 911 if (rp[i] > bcol) break; 912 if (rp[i] == bcol) { 913 bap = ap + bs2*i + bs*cidx + ridx; 914 if (is == ADD_VALUES) *bap += value; 915 else *bap = value; 916 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 917 if (brow == bcol && ridx < cidx){ 918 bap = ap + bs2*i + bs*ridx + cidx; 919 if (is == ADD_VALUES) *bap += value; 920 else *bap = value; 921 } 922 goto noinsert1; 923 } 924 } 925 926 if (nonew == 1) goto noinsert1; 927 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 928 if (nrow >= rmax) { 929 /* there is no extra room in row, therefore enlarge */ 930 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 931 MatScalar *new_a; 932 933 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 934 935 /* Malloc new storage space */ 936 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 937 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 938 new_j = (int*)(new_a + bs2*new_nz); 939 new_i = new_j + new_nz; 940 941 /* copy over old data into new slots */ 942 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 943 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 944 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 945 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 946 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 947 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 948 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 949 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 950 /* free up old matrix storage */ 951 ierr = PetscFree(a->a);CHKERRQ(ierr); 952 if (!a->singlemalloc) { 953 ierr = PetscFree(a->i);CHKERRQ(ierr); 954 ierr = PetscFree(a->j);CHKERRQ(ierr); 955 } 956 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 957 a->singlemalloc = PETSC_TRUE; 958 959 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 960 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 961 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 962 a->s_maxnz += bs2*CHUNKSIZE; 963 a->reallocs++; 964 a->s_nz++; 965 } 966 967 N = nrow++ - 1; 968 /* shift up all the later entries in this row */ 969 for (ii=N; ii>=i; ii--) { 970 rp[ii+1] = rp[ii]; 971 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 972 } 973 if (N>=i) { 974 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 975 } 976 rp[i] = bcol; 977 ap[bs2*i + bs*cidx + ridx] = value; 978 noinsert1:; 979 low = i; 980 /* } */ 981 } 982 } /* end of if .. if.. */ 983 } /* end of loop over added columns */ 984 ailen[brow] = nrow; 985 } /* end of loop over added rows */ 986 987 PetscFunctionReturn(0); 988 } 989 990 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 991 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 992 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 993 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 994 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 995 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 996 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 997 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 998 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 999 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1000 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1001 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1002 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1003 extern int MatZeroEntries_SeqSBAIJ(Mat); 1004 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1005 1006 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1007 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1008 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1009 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1010 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1011 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1012 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1013 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1014 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1015 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1016 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1017 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1018 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1019 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1020 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1021 1022 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 1023 1024 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1025 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1026 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1027 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1028 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1029 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1030 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1032 1033 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1034 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1035 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1036 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1037 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1038 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1041 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 1042 1043 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1044 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1045 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1046 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1047 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1048 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1051 1052 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1053 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1054 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1055 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1056 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1057 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1060 1061 #ifdef HAVE_MatICCFactor 1062 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1065 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1066 { 1067 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1068 Mat outA; 1069 int ierr; 1070 PetscTruth row_identity,col_identity; 1071 1072 PetscFunctionBegin; 1073 /* 1074 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1075 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1076 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1077 if (!row_identity || !col_identity) { 1078 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1079 } 1080 */ 1081 1082 outA = inA; 1083 inA->factor = FACTOR_CHOLESKY; 1084 1085 if (!a->diag) { 1086 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1087 } 1088 /* 1089 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1090 for ILU(0) factorization with natural ordering 1091 */ 1092 switch (a->bs) { 1093 case 1: 1094 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1095 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1096 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1097 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1098 case 2: 1099 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1100 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1101 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1102 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1103 break; 1104 case 3: 1105 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1106 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1107 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1108 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1109 break; 1110 case 4: 1111 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1112 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1113 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1114 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1115 break; 1116 case 5: 1117 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1118 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1119 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1120 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1121 break; 1122 case 6: 1123 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1124 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1125 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1126 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1127 break; 1128 case 7: 1129 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1130 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1131 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1132 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1133 break; 1134 default: 1135 a->row = row; 1136 a->icol = col; 1137 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1138 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1139 1140 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1141 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1142 PetscLogObjectParent(inA,a->icol); 1143 1144 if (!a->solve_work) { 1145 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1146 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1147 } 1148 } 1149 1150 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1151 1152 PetscFunctionReturn(0); 1153 } 1154 #endif 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1158 int MatPrintHelp_SeqSBAIJ(Mat A) 1159 { 1160 static PetscTruth called = PETSC_FALSE; 1161 MPI_Comm comm = A->comm; 1162 int ierr; 1163 1164 PetscFunctionBegin; 1165 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1166 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1167 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 EXTERN_C_BEGIN 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1174 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1175 { 1176 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1177 int i,nz,n; 1178 1179 PetscFunctionBegin; 1180 nz = baij->s_maxnz; 1181 n = mat->n; 1182 for (i=0; i<nz; i++) { 1183 baij->j[i] = indices[i]; 1184 } 1185 baij->s_nz = nz; 1186 for (i=0; i<n; i++) { 1187 baij->ilen[i] = baij->imax[i]; 1188 } 1189 1190 PetscFunctionReturn(0); 1191 } 1192 EXTERN_C_END 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1196 /*@ 1197 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1198 in the matrix. 1199 1200 Input Parameters: 1201 + mat - the SeqSBAIJ matrix 1202 - indices - the column indices 1203 1204 Level: advanced 1205 1206 Notes: 1207 This can be called if you have precomputed the nonzero structure of the 1208 matrix and want to provide it to the matrix object to improve the performance 1209 of the MatSetValues() operation. 1210 1211 You MUST have set the correct numbers of nonzeros per row in the call to 1212 MatCreateSeqSBAIJ(). 1213 1214 MUST be called before any calls to MatSetValues() 1215 1216 .seealso: MatCreateSeqSBAIJ 1217 @*/ 1218 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1219 { 1220 int ierr,(*f)(Mat,int *); 1221 1222 PetscFunctionBegin; 1223 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1224 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1225 if (f) { 1226 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1227 } else { 1228 SETERRQ(1,"Wrong type of matrix to set column indices"); 1229 } 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1235 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1236 { 1237 int ierr; 1238 1239 PetscFunctionBegin; 1240 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1246 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1247 { 1248 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1249 PetscFunctionBegin; 1250 *array = a->a; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1256 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1257 { 1258 PetscFunctionBegin; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #include "petscblaslapack.h" 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1265 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1266 { 1267 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1268 int ierr,one=1,i,bs=y->bs,bs2,j; 1269 1270 PetscFunctionBegin; 1271 if (str == SAME_NONZERO_PATTERN) { 1272 BLaxpy_(&x->s_nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1273 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1274 if (y->xtoy && y->XtoY != X) { 1275 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1276 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1277 } 1278 if (!y->xtoy) { /* get xtoy */ 1279 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1280 y->XtoY = X; 1281 } 1282 bs2 = bs*bs; 1283 for (i=0; i<x->s_nz; i++) { 1284 j = 0; 1285 while (j < bs2){ 1286 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1287 j++; 1288 } 1289 } 1290 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->s_nz,bs2*y->s_nz,(PetscReal)(bs2*x->s_nz)/(bs2*y->s_nz)); 1291 } else { 1292 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1299 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1300 { 1301 PetscFunctionBegin; 1302 *flg = PETSC_TRUE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1308 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1309 { 1310 PetscFunctionBegin; 1311 *flg = PETSC_TRUE; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1317 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1318 { 1319 PetscFunctionBegin; 1320 *flg = PETSC_FALSE; 1321 PetscFunctionReturn(0); 1322 } 1323 1324 /* -------------------------------------------------------------------*/ 1325 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1326 MatGetRow_SeqSBAIJ, 1327 MatRestoreRow_SeqSBAIJ, 1328 MatMult_SeqSBAIJ_N, 1329 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1330 MatMultTranspose_SeqSBAIJ, 1331 MatMultTransposeAdd_SeqSBAIJ, 1332 MatSolve_SeqSBAIJ_N, 1333 0, 1334 0, 1335 /*10*/ 0, 1336 0, 1337 MatCholeskyFactor_SeqSBAIJ, 1338 MatRelax_SeqSBAIJ, 1339 MatTranspose_SeqSBAIJ, 1340 /*15*/ MatGetInfo_SeqSBAIJ, 1341 MatEqual_SeqSBAIJ, 1342 MatGetDiagonal_SeqSBAIJ, 1343 MatDiagonalScale_SeqSBAIJ, 1344 MatNorm_SeqSBAIJ, 1345 /*20*/ 0, 1346 MatAssemblyEnd_SeqSBAIJ, 1347 0, 1348 MatSetOption_SeqSBAIJ, 1349 MatZeroEntries_SeqSBAIJ, 1350 /*25*/ MatZeroRows_SeqSBAIJ, 1351 0, 1352 0, 1353 MatCholeskyFactorSymbolic_SeqSBAIJ, 1354 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1355 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1356 0, 1357 MatICCFactorSymbolic_SeqSBAIJ, 1358 MatGetArray_SeqSBAIJ, 1359 MatRestoreArray_SeqSBAIJ, 1360 /*35*/ MatDuplicate_SeqSBAIJ, 1361 0, 1362 0, 1363 0, 1364 0, 1365 /*40*/ MatAXPY_SeqSBAIJ, 1366 MatGetSubMatrices_SeqSBAIJ, 1367 MatIncreaseOverlap_SeqSBAIJ, 1368 MatGetValues_SeqSBAIJ, 1369 0, 1370 /*45*/ MatPrintHelp_SeqSBAIJ, 1371 MatScale_SeqSBAIJ, 1372 0, 1373 0, 1374 0, 1375 /*50*/ MatGetBlockSize_SeqSBAIJ, 1376 MatGetRowIJ_SeqSBAIJ, 1377 MatRestoreRowIJ_SeqSBAIJ, 1378 0, 1379 0, 1380 /*55*/ 0, 1381 0, 1382 0, 1383 0, 1384 MatSetValuesBlocked_SeqSBAIJ, 1385 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1386 0, 1387 0, 1388 MatGetPetscMaps_Petsc, 1389 0, 1390 /*65*/ 0, 1391 0, 1392 0, 1393 0, 1394 0, 1395 /*70*/ MatGetRowMax_SeqSBAIJ, 1396 0, 1397 0, 1398 0, 1399 0, 1400 /*75*/ 0, 1401 0, 1402 0, 1403 0, 1404 0, 1405 /*80*/ 0, 1406 0, 1407 0, 1408 0, 1409 #if !defined(PETSC_USE_COMPLEX) 1410 MatGetInertia_SeqSBAIJ, 1411 #else 1412 0, 1413 #endif 1414 /*85*/ MatLoad_SeqSBAIJ, 1415 MatIsSymmetric_SeqSBAIJ, 1416 MatIsStructurallySymmetric_SeqSBAIJ, 1417 MatIsHermitian_SeqSBAIJ 1418 }; 1419 1420 EXTERN_C_BEGIN 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1423 int MatStoreValues_SeqSBAIJ(Mat mat) 1424 { 1425 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1426 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1427 int ierr; 1428 1429 PetscFunctionBegin; 1430 if (aij->nonew != 1) { 1431 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1432 } 1433 1434 /* allocate space for values if not already there */ 1435 if (!aij->saved_values) { 1436 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1437 } 1438 1439 /* copy values over */ 1440 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1441 PetscFunctionReturn(0); 1442 } 1443 EXTERN_C_END 1444 1445 EXTERN_C_BEGIN 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1448 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1449 { 1450 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1451 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1452 1453 PetscFunctionBegin; 1454 if (aij->nonew != 1) { 1455 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1456 } 1457 if (!aij->saved_values) { 1458 SETERRQ(1,"Must call MatStoreValues(A);first"); 1459 } 1460 1461 /* copy values over */ 1462 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1463 PetscFunctionReturn(0); 1464 } 1465 EXTERN_C_END 1466 1467 EXTERN_C_BEGIN 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1470 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1471 { 1472 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1473 int i,len,ierr,mbs,bs2; 1474 PetscTruth flg; 1475 int s_nz; 1476 1477 PetscFunctionBegin; 1478 B->preallocated = PETSC_TRUE; 1479 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1480 mbs = B->m/bs; 1481 bs2 = bs*bs; 1482 1483 if (mbs*bs != B->m) { 1484 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1485 } 1486 1487 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1488 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1489 if (nnz) { 1490 for (i=0; i<mbs; i++) { 1491 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1492 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1493 } 1494 } 1495 1496 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1497 if (!flg) { 1498 switch (bs) { 1499 case 1: 1500 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1501 B->ops->solve = MatSolve_SeqSBAIJ_1; 1502 B->ops->solves = MatSolves_SeqSBAIJ_1; 1503 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1504 B->ops->mult = MatMult_SeqSBAIJ_1; 1505 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1506 break; 1507 case 2: 1508 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1509 B->ops->solve = MatSolve_SeqSBAIJ_2; 1510 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1511 B->ops->mult = MatMult_SeqSBAIJ_2; 1512 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1513 break; 1514 case 3: 1515 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1516 B->ops->solve = MatSolve_SeqSBAIJ_3; 1517 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1518 B->ops->mult = MatMult_SeqSBAIJ_3; 1519 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1520 break; 1521 case 4: 1522 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1523 B->ops->solve = MatSolve_SeqSBAIJ_4; 1524 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1525 B->ops->mult = MatMult_SeqSBAIJ_4; 1526 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1527 break; 1528 case 5: 1529 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1530 B->ops->solve = MatSolve_SeqSBAIJ_5; 1531 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1532 B->ops->mult = MatMult_SeqSBAIJ_5; 1533 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1534 break; 1535 case 6: 1536 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1537 B->ops->solve = MatSolve_SeqSBAIJ_6; 1538 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1539 B->ops->mult = MatMult_SeqSBAIJ_6; 1540 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1541 break; 1542 case 7: 1543 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1544 B->ops->solve = MatSolve_SeqSBAIJ_7; 1545 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1546 B->ops->mult = MatMult_SeqSBAIJ_7; 1547 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1548 break; 1549 } 1550 } 1551 1552 b->mbs = mbs; 1553 b->nbs = mbs; 1554 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1555 if (!nnz) { 1556 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1557 else if (nz <= 0) nz = 1; 1558 for (i=0; i<mbs; i++) { 1559 b->imax[i] = nz; 1560 } 1561 nz = nz*mbs; /* total nz */ 1562 } else { 1563 nz = 0; 1564 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1565 } 1566 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1567 s_nz = nz; 1568 1569 /* allocate the matrix space */ 1570 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1571 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1572 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1573 b->j = (int*)(b->a + s_nz*bs2); 1574 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1575 b->i = b->j + s_nz; 1576 b->singlemalloc = PETSC_TRUE; 1577 1578 /* pointer to beginning of each row */ 1579 b->i[0] = 0; 1580 for (i=1; i<mbs+1; i++) { 1581 b->i[i] = b->i[i-1] + b->imax[i-1]; 1582 } 1583 1584 /* b->ilen will count nonzeros in each block row so far. */ 1585 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1586 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1587 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1588 1589 b->bs = bs; 1590 b->bs2 = bs2; 1591 b->s_nz = 0; 1592 b->s_maxnz = s_nz*bs2; 1593 1594 b->inew = 0; 1595 b->jnew = 0; 1596 b->anew = 0; 1597 b->a2anew = 0; 1598 b->permute = PETSC_FALSE; 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 /*MC 1604 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1605 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1606 1607 Options Database Keys: 1608 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1609 1610 Level: beginner 1611 1612 .seealso: MatCreateSeqSBAIJ 1613 M*/ 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1618 int MatCreate_SeqSBAIJ(Mat B) 1619 { 1620 Mat_SeqSBAIJ *b; 1621 int ierr,size; 1622 1623 PetscFunctionBegin; 1624 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1625 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1626 B->m = B->M = PetscMax(B->m,B->M); 1627 B->n = B->N = PetscMax(B->n,B->N); 1628 1629 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1630 B->data = (void*)b; 1631 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1632 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1633 B->ops->destroy = MatDestroy_SeqSBAIJ; 1634 B->ops->view = MatView_SeqSBAIJ; 1635 B->factor = 0; 1636 B->lupivotthreshold = 1.0; 1637 B->mapping = 0; 1638 b->row = 0; 1639 b->icol = 0; 1640 b->reallocs = 0; 1641 b->saved_values = 0; 1642 1643 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1644 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1645 1646 b->sorted = PETSC_FALSE; 1647 b->roworiented = PETSC_TRUE; 1648 b->nonew = 0; 1649 b->diag = 0; 1650 b->solve_work = 0; 1651 b->mult_work = 0; 1652 B->spptr = 0; 1653 b->keepzeroedrows = PETSC_FALSE; 1654 b->xtoy = 0; 1655 b->XtoY = 0; 1656 1657 b->inew = 0; 1658 b->jnew = 0; 1659 b->anew = 0; 1660 b->a2anew = 0; 1661 b->permute = PETSC_FALSE; 1662 1663 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1664 "MatStoreValues_SeqSBAIJ", 1665 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1666 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1667 "MatRetrieveValues_SeqSBAIJ", 1668 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1669 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1670 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1671 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1672 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1673 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1674 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1681 /*@C 1682 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1683 compressed row) format. For good matrix assembly performance the 1684 user should preallocate the matrix storage by setting the parameter nz 1685 (or the array nnz). By setting these parameters accurately, performance 1686 during matrix assembly can be increased by more than a factor of 50. 1687 1688 Collective on Mat 1689 1690 Input Parameters: 1691 + A - the symmetric matrix 1692 . bs - size of block 1693 . nz - number of block nonzeros per block row (same for all rows) 1694 - nnz - array containing the number of block nonzeros in the upper triangular plus 1695 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1696 1697 Options Database Keys: 1698 . -mat_no_unroll - uses code that does not unroll the loops in the 1699 block calculations (much slower) 1700 . -mat_block_size - size of the blocks to use 1701 1702 Level: intermediate 1703 1704 Notes: 1705 Specify the preallocated storage with either nz or nnz (not both). 1706 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1707 allocation. For additional details, see the users manual chapter on 1708 matrices. 1709 1710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1711 @*/ 1712 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1713 int ierr,(*f)(Mat,int,int,const int[]); 1714 1715 PetscFunctionBegin; 1716 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1717 if (f) { 1718 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatCreateSeqSBAIJ" 1725 /*@C 1726 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1727 compressed row) format. For good matrix assembly performance the 1728 user should preallocate the matrix storage by setting the parameter nz 1729 (or the array nnz). By setting these parameters accurately, performance 1730 during matrix assembly can be increased by more than a factor of 50. 1731 1732 Collective on MPI_Comm 1733 1734 Input Parameters: 1735 + comm - MPI communicator, set to PETSC_COMM_SELF 1736 . bs - size of block 1737 . m - number of rows, or number of columns 1738 . nz - number of block nonzeros per block row (same for all rows) 1739 - nnz - array containing the number of block nonzeros in the upper triangular plus 1740 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1741 1742 Output Parameter: 1743 . A - the symmetric matrix 1744 1745 Options Database Keys: 1746 . -mat_no_unroll - uses code that does not unroll the loops in the 1747 block calculations (much slower) 1748 . -mat_block_size - size of the blocks to use 1749 1750 Level: intermediate 1751 1752 Notes: 1753 1754 Specify the preallocated storage with either nz or nnz (not both). 1755 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1756 allocation. For additional details, see the users manual chapter on 1757 matrices. 1758 1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1760 @*/ 1761 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1762 { 1763 int ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1767 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1768 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #undef __FUNCT__ 1773 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1774 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1775 { 1776 Mat C; 1777 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1778 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1779 1780 PetscFunctionBegin; 1781 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1782 1783 *B = 0; 1784 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1785 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1786 c = (Mat_SeqSBAIJ*)C->data; 1787 1788 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1789 C->preallocated = PETSC_TRUE; 1790 C->factor = A->factor; 1791 c->row = 0; 1792 c->icol = 0; 1793 c->saved_values = 0; 1794 c->keepzeroedrows = a->keepzeroedrows; 1795 C->assembled = PETSC_TRUE; 1796 1797 c->bs = a->bs; 1798 c->bs2 = a->bs2; 1799 c->mbs = a->mbs; 1800 c->nbs = a->nbs; 1801 1802 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1803 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1804 for (i=0; i<mbs; i++) { 1805 c->imax[i] = a->imax[i]; 1806 c->ilen[i] = a->ilen[i]; 1807 } 1808 1809 /* allocate the matrix space */ 1810 c->singlemalloc = PETSC_TRUE; 1811 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1812 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1813 c->j = (int*)(c->a + nz*bs2); 1814 c->i = c->j + nz; 1815 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1816 if (mbs > 0) { 1817 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1818 if (cpvalues == MAT_COPY_VALUES) { 1819 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1820 } else { 1821 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1822 } 1823 } 1824 1825 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1826 c->sorted = a->sorted; 1827 c->roworiented = a->roworiented; 1828 c->nonew = a->nonew; 1829 1830 if (a->diag) { 1831 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1832 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1833 for (i=0; i<mbs; i++) { 1834 c->diag[i] = a->diag[i]; 1835 } 1836 } else c->diag = 0; 1837 c->s_nz = a->s_nz; 1838 c->s_maxnz = a->s_maxnz; 1839 c->solve_work = 0; 1840 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1841 c->mult_work = 0; 1842 *B = C; 1843 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1849 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1850 { 1851 Mat_SeqSBAIJ *a; 1852 Mat B; 1853 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1854 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1855 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1856 int *masked,nmask,tmp,bs2,ishift; 1857 PetscScalar *aa; 1858 MPI_Comm comm = ((PetscObject)viewer)->comm; 1859 1860 PetscFunctionBegin; 1861 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1862 bs2 = bs*bs; 1863 1864 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1865 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1866 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1867 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1868 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1869 M = header[1]; N = header[2]; nz = header[3]; 1870 1871 if (header[3] < 0) { 1872 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1873 } 1874 1875 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1876 1877 /* 1878 This code adds extra rows to make sure the number of rows is 1879 divisible by the blocksize 1880 */ 1881 mbs = M/bs; 1882 extra_rows = bs - M + bs*(mbs); 1883 if (extra_rows == bs) extra_rows = 0; 1884 else mbs++; 1885 if (extra_rows) { 1886 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1887 } 1888 1889 /* read in row lengths */ 1890 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1891 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1892 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1893 1894 /* read in column indices */ 1895 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1896 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1897 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1898 1899 /* loop over row lengths determining block row lengths */ 1900 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1901 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1902 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1903 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1904 masked = mask + mbs; 1905 rowcount = 0; nzcount = 0; 1906 for (i=0; i<mbs; i++) { 1907 nmask = 0; 1908 for (j=0; j<bs; j++) { 1909 kmax = rowlengths[rowcount]; 1910 for (k=0; k<kmax; k++) { 1911 tmp = jj[nzcount++]/bs; /* block col. index */ 1912 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1913 } 1914 rowcount++; 1915 } 1916 s_browlengths[i] += nmask; 1917 1918 /* zero out the mask elements we set */ 1919 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1920 } 1921 1922 /* create our matrix */ 1923 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1924 ierr = MatSetType(B,type);CHKERRQ(ierr); 1925 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1926 a = (Mat_SeqSBAIJ*)B->data; 1927 1928 /* set matrix "i" values */ 1929 a->i[0] = 0; 1930 for (i=1; i<= mbs; i++) { 1931 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1932 a->ilen[i-1] = s_browlengths[i-1]; 1933 } 1934 a->s_nz = a->i[mbs]; 1935 1936 /* read in nonzero values */ 1937 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1938 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1939 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1940 1941 /* set "a" and "j" values into matrix */ 1942 nzcount = 0; jcount = 0; 1943 for (i=0; i<mbs; i++) { 1944 nzcountb = nzcount; 1945 nmask = 0; 1946 for (j=0; j<bs; j++) { 1947 kmax = rowlengths[i*bs+j]; 1948 for (k=0; k<kmax; k++) { 1949 tmp = jj[nzcount++]/bs; /* block col. index */ 1950 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1951 } 1952 } 1953 /* sort the masked values */ 1954 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1955 1956 /* set "j" values into matrix */ 1957 maskcount = 1; 1958 for (j=0; j<nmask; j++) { 1959 a->j[jcount++] = masked[j]; 1960 mask[masked[j]] = maskcount++; 1961 } 1962 1963 /* set "a" values into matrix */ 1964 ishift = bs2*a->i[i]; 1965 for (j=0; j<bs; j++) { 1966 kmax = rowlengths[i*bs+j]; 1967 for (k=0; k<kmax; k++) { 1968 tmp = jj[nzcountb]/bs ; /* block col. index */ 1969 if (tmp >= i){ 1970 block = mask[tmp] - 1; 1971 point = jj[nzcountb] - bs*tmp; 1972 idx = ishift + bs2*block + j + bs*point; 1973 a->a[idx] = aa[nzcountb]; 1974 } 1975 nzcountb++; 1976 } 1977 } 1978 /* zero out the mask elements we set */ 1979 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1980 } 1981 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1982 1983 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1984 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1985 ierr = PetscFree(aa);CHKERRQ(ierr); 1986 ierr = PetscFree(jj);CHKERRQ(ierr); 1987 ierr = PetscFree(mask);CHKERRQ(ierr); 1988 1989 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1990 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1991 ierr = MatView_Private(B);CHKERRQ(ierr); 1992 *A = B; 1993 PetscFunctionReturn(0); 1994 } 1995 1996 #undef __FUNCT__ 1997 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1998 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1999 { 2000 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 2001 MatScalar *aa=a->a,*v,*v1; 2002 PetscScalar *x,*b,*t,sum,d; 2003 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 2004 int nz,nz1,*vj,*vj1,i; 2005 2006 PetscFunctionBegin; 2007 its = its*lits; 2008 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 2009 2010 if (bs > 1) 2011 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2012 2013 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2014 if (xx != bb) { 2015 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2016 } else { 2017 b = x; 2018 } 2019 2020 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 2021 2022 if (flag & SOR_ZERO_INITIAL_GUESS) { 2023 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2024 for (i=0; i<m; i++) 2025 t[i] = b[i]; 2026 2027 for (i=0; i<m; i++){ 2028 d = *(aa + ai[i]); /* diag[i] */ 2029 v = aa + ai[i] + 1; 2030 vj = aj + ai[i] + 1; 2031 nz = ai[i+1] - ai[i] - 1; 2032 x[i] = omega*t[i]/d; 2033 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2034 PetscLogFlops(2*nz-1); 2035 } 2036 } 2037 2038 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2039 for (i=0; i<m; i++) 2040 t[i] = b[i]; 2041 2042 for (i=0; i<m-1; i++){ /* update rhs */ 2043 v = aa + ai[i] + 1; 2044 vj = aj + ai[i] + 1; 2045 nz = ai[i+1] - ai[i] - 1; 2046 while (nz--) t[*vj++] -= x[i]*(*v++); 2047 PetscLogFlops(2*nz-1); 2048 } 2049 for (i=m-1; i>=0; i--){ 2050 d = *(aa + ai[i]); 2051 v = aa + ai[i] + 1; 2052 vj = aj + ai[i] + 1; 2053 nz = ai[i+1] - ai[i] - 1; 2054 sum = t[i]; 2055 while (nz--) sum -= x[*vj++]*(*v++); 2056 PetscLogFlops(2*nz-1); 2057 x[i] = (1-omega)*x[i] + omega*sum/d; 2058 } 2059 } 2060 its--; 2061 } 2062 2063 while (its--) { 2064 /* 2065 forward sweep: 2066 for i=0,...,m-1: 2067 sum[i] = (b[i] - U(i,:)x )/d[i]; 2068 x[i] = (1-omega)x[i] + omega*sum[i]; 2069 b = b - x[i]*U^T(i,:); 2070 2071 */ 2072 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2073 for (i=0; i<m; i++) 2074 t[i] = b[i]; 2075 2076 for (i=0; i<m; i++){ 2077 d = *(aa + ai[i]); /* diag[i] */ 2078 v = aa + ai[i] + 1; v1=v; 2079 vj = aj + ai[i] + 1; vj1=vj; 2080 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2081 sum = t[i]; 2082 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2083 x[i] = (1-omega)*x[i] + omega*sum/d; 2084 while (nz--) t[*vj++] -= x[i]*(*v++); 2085 PetscLogFlops(4*nz-2); 2086 } 2087 } 2088 2089 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2090 /* 2091 backward sweep: 2092 b = b - x[i]*U^T(i,:), i=0,...,n-2 2093 for i=m-1,...,0: 2094 sum[i] = (b[i] - U(i,:)x )/d[i]; 2095 x[i] = (1-omega)x[i] + omega*sum[i]; 2096 */ 2097 for (i=0; i<m; i++) 2098 t[i] = b[i]; 2099 2100 for (i=0; i<m-1; i++){ /* update rhs */ 2101 v = aa + ai[i] + 1; 2102 vj = aj + ai[i] + 1; 2103 nz = ai[i+1] - ai[i] - 1; 2104 while (nz--) t[*vj++] -= x[i]*(*v++); 2105 PetscLogFlops(2*nz-1); 2106 } 2107 for (i=m-1; i>=0; i--){ 2108 d = *(aa + ai[i]); 2109 v = aa + ai[i] + 1; 2110 vj = aj + ai[i] + 1; 2111 nz = ai[i+1] - ai[i] - 1; 2112 sum = t[i]; 2113 while (nz--) sum -= x[*vj++]*(*v++); 2114 PetscLogFlops(2*nz-1); 2115 x[i] = (1-omega)*x[i] + omega*sum/d; 2116 } 2117 } 2118 } 2119 2120 ierr = PetscFree(t); CHKERRQ(ierr); 2121 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2122 if (bb != xx) { 2123 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2124 } 2125 2126 PetscFunctionReturn(0); 2127 } 2128 2129 2130 2131 2132 2133 2134