1 /*$Id: baij.c,v 1.240 2001/07/16 20:10:28 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled=PETSC_FALSE; 217 int ierr; 218 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 219 #if defined(PETSC_HAVE_SSE) 220 if (sse_enabled) { 221 a->single_precision_solves = PETSC_TRUE; 222 A->ops->solve = MatSolve_SeqBAIJ_Update; 223 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 224 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 225 break; 226 } 227 #else 228 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 229 #endif 230 } 231 break; 232 default: 233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 241 { 242 PetscFunctionBegin; 243 if (m) *m = 0; 244 if (n) *n = A->m; 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetRow_SeqBAIJ" 250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 251 { 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 254 MatScalar *aa,*aa_i; 255 Scalar *v_i; 256 257 PetscFunctionBegin; 258 bs = a->bs; 259 ai = a->i; 260 aj = a->j; 261 aa = a->a; 262 bs2 = a->bs2; 263 264 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 265 266 bn = row/bs; /* Block number */ 267 bp = row % bs; /* Block Position */ 268 M = ai[bn+1] - ai[bn]; 269 *nz = bs*M; 270 271 if (v) { 272 *v = 0; 273 if (*nz) { 274 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 275 for (i=0; i<M; i++) { /* for each block in the block row */ 276 v_i = *v + i*bs; 277 aa_i = aa + bs2*(ai[bn] + i); 278 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 279 } 280 } 281 } 282 283 if (idx) { 284 *idx = 0; 285 if (*nz) { 286 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 287 for (i=0; i<M; i++) { /* for each block in the block row */ 288 idx_i = *idx + i*bs; 289 itmp = bs*aj[ai[bn] + i]; 290 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 291 } 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 300 { 301 int ierr; 302 303 PetscFunctionBegin; 304 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 305 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatTranspose_SeqBAIJ" 311 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 312 { 313 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 314 Mat C; 315 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 316 int *rows,*cols,bs2=a->bs2; 317 Scalar *array; 318 319 PetscFunctionBegin; 320 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 321 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 322 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 323 324 #if defined(PETSC_USE_MAT_SINGLE) 325 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 326 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 327 #else 328 array = a->a; 329 #endif 330 331 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 332 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 333 ierr = PetscFree(col);CHKERRQ(ierr); 334 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 335 cols = rows + bs; 336 for (i=0; i<mbs; i++) { 337 cols[0] = i*bs; 338 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 339 len = ai[i+1] - ai[i]; 340 for (j=0; j<len; j++) { 341 rows[0] = (*aj++)*bs; 342 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 343 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 344 array += bs2; 345 } 346 } 347 ierr = PetscFree(rows);CHKERRQ(ierr); 348 #if defined(PETSC_USE_MAT_SINGLE) 349 ierr = PetscFree(array); 350 #endif 351 352 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 355 if (B) { 356 *B = C; 357 } else { 358 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = A->m; 378 col_lens[2] = A->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 443 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 PetscViewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = PETSC_DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = PETSC_DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = PETSC_DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 569 { 570 int ierr; 571 PetscReal xl,yl,xr,yr,w,h; 572 PetscDraw draw; 573 PetscTruth isnull; 574 575 PetscFunctionBegin; 576 577 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 578 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 579 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 581 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 582 xr += w; yr += h; xl = -w; yl = -h; 583 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 584 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatView_SeqBAIJ" 591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 592 { 593 int ierr; 594 PetscTruth issocket,isascii,isbinary,isdraw; 595 596 PetscFunctionBegin; 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 if (issocket) { 602 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 603 } else if (isascii){ 604 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 607 } else if (isdraw) { 608 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 609 } else { 610 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_SeqBAIJ" 618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 619 { 620 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 621 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 622 int *ai = a->i,*ailen = a->ilen; 623 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 624 MatScalar *ap,*aa = a->a,zero = 0.0; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over rows */ 628 row = im[k]; brow = row/bs; 629 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 630 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 631 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 632 nrow = ailen[brow]; 633 for (l=0; l<n; l++) { /* loop over columns */ 634 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 635 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 636 col = in[l] ; 637 bcol = col/bs; 638 cidx = col%bs; 639 ridx = row%bs; 640 high = nrow; 641 low = 0; /* assume unsorted */ 642 while (high-low > 5) { 643 t = (low+high)/2; 644 if (rp[t] > bcol) high = t; 645 else low = t; 646 } 647 for (i=low; i<high; i++) { 648 if (rp[i] > bcol) break; 649 if (rp[i] == bcol) { 650 *v++ = ap[bs2*i+bs*cidx+ridx]; 651 goto finished; 652 } 653 } 654 *v++ = zero; 655 finished:; 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #if defined(PETSC_USE_MAT_SINGLE) 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 665 { 666 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 667 int ierr,i,N = m*n*b->bs2; 668 MatScalar *vsingle; 669 670 PetscFunctionBegin; 671 if (N > b->setvalueslen) { 672 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 673 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 674 b->setvalueslen = N; 675 } 676 vsingle = b->setvaluescopy; 677 for (i=0; i<N; i++) { 678 vsingle[i] = v[i]; 679 } 680 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 #endif 684 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 692 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 693 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 694 PetscTruth roworiented=a->roworiented; 695 MatScalar *value = v,*ap,*aa = a->a,*bap; 696 697 PetscFunctionBegin; 698 if (roworiented) { 699 stepval = (n-1)*bs; 700 } else { 701 stepval = (m-1)*bs; 702 } 703 for (k=0; k<m; k++) { /* loop over added rows */ 704 row = im[k]; 705 if (row < 0) continue; 706 #if defined(PETSC_USE_BOPT_g) 707 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 708 #endif 709 rp = aj + ai[row]; 710 ap = aa + bs2*ai[row]; 711 rmax = imax[row]; 712 nrow = ailen[row]; 713 low = 0; 714 for (l=0; l<n; l++) { /* loop over added columns */ 715 if (in[l] < 0) continue; 716 #if defined(PETSC_USE_BOPT_g) 717 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 718 #endif 719 col = in[l]; 720 if (roworiented) { 721 value = v + k*(stepval+bs)*bs + l*bs; 722 } else { 723 value = v + l*(stepval+bs)*bs + k*bs; 724 } 725 if (!sorted) low = 0; high = nrow; 726 while (high-low > 7) { 727 t = (low+high)/2; 728 if (rp[t] > col) high = t; 729 else low = t; 730 } 731 for (i=low; i<high; i++) { 732 if (rp[i] > col) break; 733 if (rp[i] == col) { 734 bap = ap + bs2*i; 735 if (roworiented) { 736 if (is == ADD_VALUES) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] += *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] = *value++; 746 } 747 } 748 } 749 } else { 750 if (is == ADD_VALUES) { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=0; jj<bs; jj++) { 753 *bap++ += *value++; 754 } 755 } 756 } else { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ = *value++; 760 } 761 } 762 } 763 } 764 goto noinsert2; 765 } 766 } 767 if (nonew == 1) goto noinsert2; 768 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 769 if (nrow >= rmax) { 770 /* there is no extra room in row, therefore enlarge */ 771 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 772 MatScalar *new_a; 773 774 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 776 /* malloc new storage space */ 777 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 778 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 779 new_j = (int*)(new_a + bs2*new_nz); 780 new_i = new_j + new_nz; 781 782 /* copy over old data into new slots */ 783 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 784 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 785 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 786 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 787 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 790 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 791 /* free up old matrix storage */ 792 ierr = PetscFree(a->a);CHKERRQ(ierr); 793 if (!a->singlemalloc) { 794 ierr = PetscFree(a->i);CHKERRQ(ierr); 795 ierr = PetscFree(a->j);CHKERRQ(ierr); 796 } 797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 798 a->singlemalloc = PETSC_TRUE; 799 800 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 801 rmax = imax[row] = imax[row] + CHUNKSIZE; 802 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 803 a->maxnz += bs2*CHUNKSIZE; 804 a->reallocs++; 805 a->nz++; 806 } 807 N = nrow++ - 1; 808 /* shift up all the later entries in this row */ 809 for (ii=N; ii>=i; ii--) { 810 rp[ii+1] = rp[ii]; 811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 if (N >= i) { 814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 815 } 816 rp[i] = col; 817 bap = ap + bs2*i; 818 if (roworiented) { 819 for (ii=0; ii<bs; ii++,value+=stepval) { 820 for (jj=ii; jj<bs2; jj+=bs) { 821 bap[jj] = *value++; 822 } 823 } 824 } else { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=0; jj<bs; jj++) { 827 *bap++ = *value++; 828 } 829 } 830 } 831 noinsert2:; 832 low = i; 833 } 834 ailen[row] = nrow; 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 846 int m = A->m,*ip,N,*ailen = a->ilen; 847 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 848 MatScalar *aa = a->a,*ap; 849 850 PetscFunctionBegin; 851 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 852 853 if (m) rmax = ailen[0]; 854 for (i=1; i<mbs; i++) { 855 /* move each row back by the amount of empty slots (fshift) before it*/ 856 fshift += imax[i-1] - ailen[i-1]; 857 rmax = PetscMax(rmax,ailen[i]); 858 if (fshift) { 859 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 860 N = ailen[i]; 861 for (j=0; j<N; j++) { 862 ip[j-fshift] = ip[j]; 863 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 864 } 865 } 866 ai[i] = ai[i-1] + ailen[i-1]; 867 } 868 if (mbs) { 869 fshift += imax[mbs-1] - ailen[mbs-1]; 870 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 871 } 872 /* reset ilen and imax for each row */ 873 for (i=0; i<mbs; i++) { 874 ailen[i] = imax[i] = ai[i+1] - ai[i]; 875 } 876 a->nz = ai[mbs]; 877 878 /* diagonals may have moved, so kill the diagonal pointers */ 879 if (fshift && a->diag) { 880 ierr = PetscFree(a->diag);CHKERRQ(ierr); 881 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 882 a->diag = 0; 883 } 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 885 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 887 a->reallocs); 888 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 889 a->reallocs = 0; 890 A->info.nz_unneeded = (PetscReal)fshift*bs2; 891 892 PetscFunctionReturn(0); 893 } 894 895 896 897 /* 898 This function returns an array of flags which indicate the locations of contiguous 899 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 900 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 901 Assume: sizes should be long enough to hold all the values. 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 906 { 907 int i,j,k,row; 908 PetscTruth flg; 909 910 PetscFunctionBegin; 911 for (i=0,j=0; i<n; j++) { 912 row = idx[i]; 913 if (row%bs!=0) { /* Not the begining of a block */ 914 sizes[j] = 1; 915 i++; 916 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 917 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 918 i++; 919 } else { /* Begining of the block, so check if the complete block exists */ 920 flg = PETSC_TRUE; 921 for (k=1; k<bs; k++) { 922 if (row+k != idx[i+k]) { /* break in the block */ 923 flg = PETSC_FALSE; 924 break; 925 } 926 } 927 if (flg == PETSC_TRUE) { /* No break in the bs */ 928 sizes[j] = bs; 929 i+= bs; 930 } else { 931 sizes[j] = 1; 932 i++; 933 } 934 } 935 } 936 *bs_max = j; 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 943 { 944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 945 int ierr,i,j,k,count,is_n,*is_idx,*rows; 946 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 947 Scalar zero = 0.0; 948 MatScalar *aa; 949 950 PetscFunctionBegin; 951 /* Make a copy of the IS and sort it */ 952 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 953 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 954 955 /* allocate memory for rows,sizes */ 956 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 957 sizes = rows + is_n; 958 959 /* copy IS values to rows, and sort them */ 960 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 961 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 962 if (baij->keepzeroedrows) { 963 for (i=0; i<is_n; i++) { sizes[i] = 1; } 964 bs_max = is_n; 965 } else { 966 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 967 } 968 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 969 970 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 971 row = rows[j]; 972 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 973 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 974 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 975 if (sizes[i] == bs && !baij->keepzeroedrows) { 976 if (diag) { 977 if (baij->ilen[row/bs] > 0) { 978 baij->ilen[row/bs] = 1; 979 baij->j[baij->i[row/bs]] = row/bs; 980 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 /* Now insert all the diagonal values for this bs */ 983 for (k=0; k<bs; k++) { 984 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 985 } 986 } else { /* (!diag) */ 987 baij->ilen[row/bs] = 0; 988 } /* end (!diag) */ 989 } else { /* (sizes[i] != bs) */ 990 #if defined (PETSC_USE_DEBUG) 991 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 992 #endif 993 for (k=0; k<count; k++) { 994 aa[0] = zero; 995 aa += bs; 996 } 997 if (diag) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } 1001 } 1002 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1014 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1015 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1016 int ridx,cidx,bs2=a->bs2,ierr; 1017 PetscTruth roworiented=a->roworiented; 1018 MatScalar *ap,value,*aa=a->a,*bap; 1019 1020 PetscFunctionBegin; 1021 for (k=0; k<m; k++) { /* loop over added rows */ 1022 row = im[k]; brow = row/bs; 1023 if (row < 0) continue; 1024 #if defined(PETSC_USE_BOPT_g) 1025 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1026 #endif 1027 rp = aj + ai[brow]; 1028 ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow]; 1030 nrow = ailen[brow]; 1031 low = 0; 1032 for (l=0; l<n; l++) { /* loop over added columns */ 1033 if (in[l] < 0) continue; 1034 #if defined(PETSC_USE_BOPT_g) 1035 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1036 #endif 1037 col = in[l]; bcol = col/bs; 1038 ridx = row % bs; cidx = col % bs; 1039 if (roworiented) { 1040 value = v[l + k*n]; 1041 } else { 1042 value = v[k + l*m]; 1043 } 1044 if (!sorted) low = 0; high = nrow; 1045 while (high-low > 7) { 1046 t = (low+high)/2; 1047 if (rp[t] > bcol) high = t; 1048 else low = t; 1049 } 1050 for (i=low; i<high; i++) { 1051 if (rp[i] > bcol) break; 1052 if (rp[i] == bcol) { 1053 bap = ap + bs2*i + bs*cidx + ridx; 1054 if (is == ADD_VALUES) *bap += value; 1055 else *bap = value; 1056 goto noinsert1; 1057 } 1058 } 1059 if (nonew == 1) goto noinsert1; 1060 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 if (nrow >= rmax) { 1062 /* there is no extra room in row, therefore enlarge */ 1063 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1064 MatScalar *new_a; 1065 1066 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 1068 /* Malloc new storage space */ 1069 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1070 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1071 new_j = (int*)(new_a + bs2*new_nz); 1072 new_i = new_j + new_nz; 1073 1074 /* copy over old data into new slots */ 1075 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1076 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1077 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1078 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1079 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1083 /* free up old matrix storage */ 1084 ierr = PetscFree(a->a);CHKERRQ(ierr); 1085 if (!a->singlemalloc) { 1086 ierr = PetscFree(a->i);CHKERRQ(ierr); 1087 ierr = PetscFree(a->j);CHKERRQ(ierr); 1088 } 1089 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1090 a->singlemalloc = PETSC_TRUE; 1091 1092 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1093 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1094 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1095 a->maxnz += bs2*CHUNKSIZE; 1096 a->reallocs++; 1097 a->nz++; 1098 } 1099 N = nrow++ - 1; 1100 /* shift up all the later entries in this row */ 1101 for (ii=N; ii>=i; ii--) { 1102 rp[ii+1] = rp[ii]; 1103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 if (N>=i) { 1106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1107 } 1108 rp[i] = bcol; 1109 ap[bs2*i + bs*cidx + ridx] = value; 1110 noinsert1:; 1111 low = i; 1112 } 1113 ailen[brow] = nrow; 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1121 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1122 { 1123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1124 Mat outA; 1125 int ierr; 1126 PetscTruth row_identity,col_identity; 1127 1128 PetscFunctionBegin; 1129 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1130 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1131 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1132 if (!row_identity || !col_identity) { 1133 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1134 } 1135 1136 outA = inA; 1137 inA->factor = FACTOR_LU; 1138 1139 if (!a->diag) { 1140 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1141 } 1142 1143 a->row = row; 1144 a->col = col; 1145 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1146 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1147 1148 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1149 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1150 PetscLogObjectParent(inA,a->icol); 1151 1152 /* 1153 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1154 for ILU(0) factorization with natural ordering 1155 */ 1156 if (a->bs < 8) { 1157 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1158 } else { 1159 if (!a->solve_work) { 1160 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1161 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1162 } 1163 } 1164 1165 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1166 1167 PetscFunctionReturn(0); 1168 } 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1171 int MatPrintHelp_SeqBAIJ(Mat A) 1172 { 1173 static PetscTruth called = PETSC_FALSE; 1174 MPI_Comm comm = A->comm; 1175 int ierr; 1176 1177 PetscFunctionBegin; 1178 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1179 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1180 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 EXTERN_C_BEGIN 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1187 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1188 { 1189 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1190 int i,nz,n; 1191 1192 PetscFunctionBegin; 1193 nz = baij->maxnz; 1194 n = mat->n; 1195 for (i=0; i<nz; i++) { 1196 baij->j[i] = indices[i]; 1197 } 1198 baij->nz = nz; 1199 for (i=0; i<n; i++) { 1200 baij->ilen[i] = baij->imax[i]; 1201 } 1202 1203 PetscFunctionReturn(0); 1204 } 1205 EXTERN_C_END 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1209 /*@ 1210 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1211 in the matrix. 1212 1213 Input Parameters: 1214 + mat - the SeqBAIJ matrix 1215 - indices - the column indices 1216 1217 Level: advanced 1218 1219 Notes: 1220 This can be called if you have precomputed the nonzero structure of the 1221 matrix and want to provide it to the matrix object to improve the performance 1222 of the MatSetValues() operation. 1223 1224 You MUST have set the correct numbers of nonzeros per row in the call to 1225 MatCreateSeqBAIJ(). 1226 1227 MUST be called before any calls to MatSetValues(); 1228 1229 @*/ 1230 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1231 { 1232 int ierr,(*f)(Mat,int *); 1233 1234 PetscFunctionBegin; 1235 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1236 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1237 if (f) { 1238 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1239 } else { 1240 SETERRQ(1,"Wrong type of matrix to set column indices"); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1247 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1248 { 1249 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1250 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1251 PetscReal atmp; 1252 Scalar *x,zero = 0.0; 1253 MatScalar *aa; 1254 int ncols,brow,krow,kcol; 1255 1256 PetscFunctionBegin; 1257 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1258 bs = a->bs; 1259 aa = a->a; 1260 ai = a->i; 1261 aj = a->j; 1262 mbs = a->mbs; 1263 1264 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1265 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1266 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1267 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1268 for (i=0; i<mbs; i++) { 1269 ncols = ai[1] - ai[0]; ai++; 1270 brow = bs*i; 1271 for (j=0; j<ncols; j++){ 1272 /* bcol = bs*(*aj); */ 1273 for (kcol=0; kcol<bs; kcol++){ 1274 for (krow=0; krow<bs; krow++){ 1275 atmp = PetscAbsScalar(*aa); aa++; 1276 row = brow + krow; /* row index */ 1277 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1278 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1279 } 1280 } 1281 aj++; 1282 } 1283 } 1284 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1290 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1291 { 1292 int ierr; 1293 1294 PetscFunctionBegin; 1295 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1301 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1302 { 1303 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1304 PetscFunctionBegin; 1305 *array = a->a; 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1311 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1312 { 1313 PetscFunctionBegin; 1314 PetscFunctionReturn(0); 1315 } 1316 1317 /* -------------------------------------------------------------------*/ 1318 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1319 MatGetRow_SeqBAIJ, 1320 MatRestoreRow_SeqBAIJ, 1321 MatMult_SeqBAIJ_N, 1322 MatMultAdd_SeqBAIJ_N, 1323 MatMultTranspose_SeqBAIJ, 1324 MatMultTransposeAdd_SeqBAIJ, 1325 MatSolve_SeqBAIJ_N, 1326 0, 1327 0, 1328 0, 1329 MatLUFactor_SeqBAIJ, 1330 0, 1331 0, 1332 MatTranspose_SeqBAIJ, 1333 MatGetInfo_SeqBAIJ, 1334 MatEqual_SeqBAIJ, 1335 MatGetDiagonal_SeqBAIJ, 1336 MatDiagonalScale_SeqBAIJ, 1337 MatNorm_SeqBAIJ, 1338 0, 1339 MatAssemblyEnd_SeqBAIJ, 1340 0, 1341 MatSetOption_SeqBAIJ, 1342 MatZeroEntries_SeqBAIJ, 1343 MatZeroRows_SeqBAIJ, 1344 MatLUFactorSymbolic_SeqBAIJ, 1345 MatLUFactorNumeric_SeqBAIJ_N, 1346 0, 1347 0, 1348 MatSetUpPreallocation_SeqBAIJ, 1349 0, 1350 MatGetOwnershipRange_SeqBAIJ, 1351 MatILUFactorSymbolic_SeqBAIJ, 1352 0, 1353 MatGetArray_SeqBAIJ, 1354 MatRestoreArray_SeqBAIJ, 1355 MatDuplicate_SeqBAIJ, 1356 0, 1357 0, 1358 MatILUFactor_SeqBAIJ, 1359 0, 1360 0, 1361 MatGetSubMatrices_SeqBAIJ, 1362 MatIncreaseOverlap_SeqBAIJ, 1363 MatGetValues_SeqBAIJ, 1364 0, 1365 MatPrintHelp_SeqBAIJ, 1366 MatScale_SeqBAIJ, 1367 0, 1368 0, 1369 0, 1370 MatGetBlockSize_SeqBAIJ, 1371 MatGetRowIJ_SeqBAIJ, 1372 MatRestoreRowIJ_SeqBAIJ, 1373 0, 1374 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 MatSetValuesBlocked_SeqBAIJ, 1380 MatGetSubMatrix_SeqBAIJ, 1381 MatDestroy_SeqBAIJ, 1382 MatView_SeqBAIJ, 1383 MatGetMaps_Petsc, 1384 0, 1385 0, 1386 0, 1387 0, 1388 0, 1389 0, 1390 MatGetRowMax_SeqBAIJ, 1391 MatConvert_Basic}; 1392 1393 EXTERN_C_BEGIN 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1396 int MatStoreValues_SeqBAIJ(Mat mat) 1397 { 1398 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1399 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1400 int ierr; 1401 1402 PetscFunctionBegin; 1403 if (aij->nonew != 1) { 1404 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1405 } 1406 1407 /* allocate space for values if not already there */ 1408 if (!aij->saved_values) { 1409 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1410 } 1411 1412 /* copy values over */ 1413 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 EXTERN_C_END 1417 1418 EXTERN_C_BEGIN 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1421 int MatRetrieveValues_SeqBAIJ(Mat mat) 1422 { 1423 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1424 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1425 1426 PetscFunctionBegin; 1427 if (aij->nonew != 1) { 1428 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1429 } 1430 if (!aij->saved_values) { 1431 SETERRQ(1,"Must call MatStoreValues(A);first"); 1432 } 1433 1434 /* copy values over */ 1435 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 EXTERN_C_END 1439 1440 EXTERN_C_BEGIN 1441 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1442 EXTERN_C_END 1443 1444 EXTERN_C_BEGIN 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatCreate_SeqBAIJ" 1447 int MatCreate_SeqBAIJ(Mat B) 1448 { 1449 int ierr,size; 1450 Mat_SeqBAIJ *b; 1451 1452 PetscFunctionBegin; 1453 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1454 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1455 1456 B->m = B->M = PetscMax(B->m,B->M); 1457 B->n = B->N = PetscMax(B->n,B->N); 1458 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1459 B->data = (void*)b; 1460 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1461 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1462 B->factor = 0; 1463 B->lupivotthreshold = 1.0; 1464 B->mapping = 0; 1465 b->row = 0; 1466 b->col = 0; 1467 b->icol = 0; 1468 b->reallocs = 0; 1469 b->saved_values = 0; 1470 #if defined(PETSC_USE_MAT_SINGLE) 1471 b->setvalueslen = 0; 1472 b->setvaluescopy = PETSC_NULL; 1473 #endif 1474 b->single_precision_solves = PETSC_FALSE; 1475 1476 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1477 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1478 1479 b->sorted = PETSC_FALSE; 1480 b->roworiented = PETSC_TRUE; 1481 b->nonew = 0; 1482 b->diag = 0; 1483 b->solve_work = 0; 1484 b->mult_work = 0; 1485 b->spptr = 0; 1486 B->info.nz_unneeded = (PetscReal)b->maxnz; 1487 b->keepzeroedrows = PETSC_FALSE; 1488 1489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1490 "MatStoreValues_SeqBAIJ", 1491 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1493 "MatRetrieveValues_SeqBAIJ", 1494 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1496 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1497 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1499 "MatConvert_SeqBAIJ_SeqAIJ", 1500 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 EXTERN_C_END 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1507 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1508 { 1509 Mat C; 1510 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1511 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1512 1513 PetscFunctionBegin; 1514 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1515 1516 *B = 0; 1517 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1518 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1519 c = (Mat_SeqBAIJ*)C->data; 1520 1521 c->bs = a->bs; 1522 c->bs2 = a->bs2; 1523 c->mbs = a->mbs; 1524 c->nbs = a->nbs; 1525 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1526 1527 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1528 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1529 for (i=0; i<mbs; i++) { 1530 c->imax[i] = a->imax[i]; 1531 c->ilen[i] = a->ilen[i]; 1532 } 1533 1534 /* allocate the matrix space */ 1535 c->singlemalloc = PETSC_TRUE; 1536 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1537 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1538 c->j = (int*)(c->a + nz*bs2); 1539 c->i = c->j + nz; 1540 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1541 if (mbs > 0) { 1542 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1543 if (cpvalues == MAT_COPY_VALUES) { 1544 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1545 } else { 1546 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1547 } 1548 } 1549 1550 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1551 c->sorted = a->sorted; 1552 c->roworiented = a->roworiented; 1553 c->nonew = a->nonew; 1554 1555 if (a->diag) { 1556 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1557 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1558 for (i=0; i<mbs; i++) { 1559 c->diag[i] = a->diag[i]; 1560 } 1561 } else c->diag = 0; 1562 c->nz = a->nz; 1563 c->maxnz = a->maxnz; 1564 c->solve_work = 0; 1565 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1566 c->mult_work = 0; 1567 C->preallocated = PETSC_TRUE; 1568 C->assembled = PETSC_TRUE; 1569 *B = C; 1570 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 EXTERN_C_BEGIN 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatLoad_SeqBAIJ" 1577 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1578 { 1579 Mat_SeqBAIJ *a; 1580 Mat B; 1581 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1582 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1583 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1584 int *masked,nmask,tmp,bs2,ishift; 1585 Scalar *aa; 1586 MPI_Comm comm = ((PetscObject)viewer)->comm; 1587 1588 PetscFunctionBegin; 1589 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1590 bs2 = bs*bs; 1591 1592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1593 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1594 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1595 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1596 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1597 M = header[1]; N = header[2]; nz = header[3]; 1598 1599 if (header[3] < 0) { 1600 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1601 } 1602 1603 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1604 1605 /* 1606 This code adds extra rows to make sure the number of rows is 1607 divisible by the blocksize 1608 */ 1609 mbs = M/bs; 1610 extra_rows = bs - M + bs*(mbs); 1611 if (extra_rows == bs) extra_rows = 0; 1612 else mbs++; 1613 if (extra_rows) { 1614 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1615 } 1616 1617 /* read in row lengths */ 1618 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1619 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1620 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1621 1622 /* read in column indices */ 1623 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1624 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1625 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1626 1627 /* loop over row lengths determining block row lengths */ 1628 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1629 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1630 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1631 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1632 masked = mask + mbs; 1633 rowcount = 0; nzcount = 0; 1634 for (i=0; i<mbs; i++) { 1635 nmask = 0; 1636 for (j=0; j<bs; j++) { 1637 kmax = rowlengths[rowcount]; 1638 for (k=0; k<kmax; k++) { 1639 tmp = jj[nzcount++]/bs; 1640 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1641 } 1642 rowcount++; 1643 } 1644 browlengths[i] += nmask; 1645 /* zero out the mask elements we set */ 1646 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1647 } 1648 1649 /* create our matrix */ 1650 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1651 B = *A; 1652 a = (Mat_SeqBAIJ*)B->data; 1653 1654 /* set matrix "i" values */ 1655 a->i[0] = 0; 1656 for (i=1; i<= mbs; i++) { 1657 a->i[i] = a->i[i-1] + browlengths[i-1]; 1658 a->ilen[i-1] = browlengths[i-1]; 1659 } 1660 a->nz = 0; 1661 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1662 1663 /* read in nonzero values */ 1664 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1665 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1666 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1667 1668 /* set "a" and "j" values into matrix */ 1669 nzcount = 0; jcount = 0; 1670 for (i=0; i<mbs; i++) { 1671 nzcountb = nzcount; 1672 nmask = 0; 1673 for (j=0; j<bs; j++) { 1674 kmax = rowlengths[i*bs+j]; 1675 for (k=0; k<kmax; k++) { 1676 tmp = jj[nzcount++]/bs; 1677 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1678 } 1679 } 1680 /* sort the masked values */ 1681 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1682 1683 /* set "j" values into matrix */ 1684 maskcount = 1; 1685 for (j=0; j<nmask; j++) { 1686 a->j[jcount++] = masked[j]; 1687 mask[masked[j]] = maskcount++; 1688 } 1689 /* set "a" values into matrix */ 1690 ishift = bs2*a->i[i]; 1691 for (j=0; j<bs; j++) { 1692 kmax = rowlengths[i*bs+j]; 1693 for (k=0; k<kmax; k++) { 1694 tmp = jj[nzcountb]/bs ; 1695 block = mask[tmp] - 1; 1696 point = jj[nzcountb] - bs*tmp; 1697 idx = ishift + bs2*block + j + bs*point; 1698 a->a[idx] = (MatScalar)aa[nzcountb++]; 1699 } 1700 } 1701 /* zero out the mask elements we set */ 1702 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1703 } 1704 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1705 1706 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1707 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1708 ierr = PetscFree(aa);CHKERRQ(ierr); 1709 ierr = PetscFree(jj);CHKERRQ(ierr); 1710 ierr = PetscFree(mask);CHKERRQ(ierr); 1711 1712 B->assembled = PETSC_TRUE; 1713 1714 ierr = MatView_Private(B);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 EXTERN_C_END 1718 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatCreateSeqBAIJ" 1721 /*@C 1722 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1723 compressed row) format. For good matrix assembly performance the 1724 user should preallocate the matrix storage by setting the parameter nz 1725 (or the array nnz). By setting these parameters accurately, performance 1726 during matrix assembly can be increased by more than a factor of 50. 1727 1728 Collective on MPI_Comm 1729 1730 Input Parameters: 1731 + comm - MPI communicator, set to PETSC_COMM_SELF 1732 . bs - size of block 1733 . m - number of rows 1734 . n - number of columns 1735 . nz - number of nonzero blocks per block row (same for all rows) 1736 - nnz - array containing the number of nonzero blocks in the various block rows 1737 (possibly different for each block row) or PETSC_NULL 1738 1739 Output Parameter: 1740 . A - the matrix 1741 1742 Options Database Keys: 1743 . -mat_no_unroll - uses code that does not unroll the loops in the 1744 block calculations (much slower) 1745 . -mat_block_size - size of the blocks to use 1746 1747 Level: intermediate 1748 1749 Notes: 1750 A nonzero block is any block that as 1 or more nonzeros in it 1751 1752 The block AIJ format is fully compatible with standard Fortran 77 1753 storage. That is, the stored row and column indices can begin at 1754 either one (as in Fortran) or zero. See the users' manual for details. 1755 1756 Specify the preallocated storage with either nz or nnz (not both). 1757 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1758 allocation. For additional details, see the users manual chapter on 1759 matrices. 1760 1761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1762 @*/ 1763 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1764 { 1765 int ierr; 1766 1767 PetscFunctionBegin; 1768 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1769 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1770 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1771 PetscFunctionReturn(0); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1776 /*@C 1777 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1778 per row in the matrix. For good matrix assembly performance the 1779 user should preallocate the matrix storage by setting the parameter nz 1780 (or the array nnz). By setting these parameters accurately, performance 1781 during matrix assembly can be increased by more than a factor of 50. 1782 1783 Collective on MPI_Comm 1784 1785 Input Parameters: 1786 + A - the matrix 1787 . bs - size of block 1788 . nz - number of block nonzeros per block row (same for all rows) 1789 - nnz - array containing the number of block nonzeros in the various block rows 1790 (possibly different for each block row) or PETSC_NULL 1791 1792 Options Database Keys: 1793 . -mat_no_unroll - uses code that does not unroll the loops in the 1794 block calculations (much slower) 1795 . -mat_block_size - size of the blocks to use 1796 1797 Level: intermediate 1798 1799 Notes: 1800 The block AIJ format is fully compatible with standard Fortran 77 1801 storage. That is, the stored row and column indices can begin at 1802 either one (as in Fortran) or zero. See the users' manual for details. 1803 1804 Specify the preallocated storage with either nz or nnz (not both). 1805 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1806 allocation. For additional details, see the users manual chapter on 1807 matrices. 1808 1809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1810 @*/ 1811 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1812 { 1813 Mat_SeqBAIJ *b; 1814 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1815 PetscTruth flg; 1816 1817 PetscFunctionBegin; 1818 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1819 if (!flg) PetscFunctionReturn(0); 1820 1821 B->preallocated = PETSC_TRUE; 1822 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1823 if (nnz && newbs != bs) { 1824 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1825 } 1826 bs = newbs; 1827 1828 mbs = B->m/bs; 1829 nbs = B->n/bs; 1830 bs2 = bs*bs; 1831 1832 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1833 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1834 } 1835 1836 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1837 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1838 if (nnz) { 1839 for (i=0; i<mbs; i++) { 1840 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1841 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1842 } 1843 } 1844 1845 b = (Mat_SeqBAIJ*)B->data; 1846 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1847 B->ops->solve = MatSolve_SeqBAIJ_Update; 1848 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1849 if (!flg) { 1850 switch (bs) { 1851 case 1: 1852 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1853 B->ops->mult = MatMult_SeqBAIJ_1; 1854 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1855 break; 1856 case 2: 1857 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1858 B->ops->mult = MatMult_SeqBAIJ_2; 1859 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1860 break; 1861 case 3: 1862 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1863 B->ops->mult = MatMult_SeqBAIJ_3; 1864 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1865 break; 1866 case 4: 1867 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1868 B->ops->mult = MatMult_SeqBAIJ_4; 1869 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1870 break; 1871 case 5: 1872 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1873 B->ops->mult = MatMult_SeqBAIJ_5; 1874 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1875 break; 1876 case 6: 1877 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1878 B->ops->mult = MatMult_SeqBAIJ_6; 1879 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1880 break; 1881 case 7: 1882 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1883 B->ops->mult = MatMult_SeqBAIJ_7; 1884 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1885 break; 1886 default: 1887 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1888 B->ops->mult = MatMult_SeqBAIJ_N; 1889 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1890 break; 1891 } 1892 } 1893 b->bs = bs; 1894 b->mbs = mbs; 1895 b->nbs = nbs; 1896 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1897 if (!nnz) { 1898 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1899 else if (nz <= 0) nz = 1; 1900 for (i=0; i<mbs; i++) b->imax[i] = nz; 1901 nz = nz*mbs; 1902 } else { 1903 nz = 0; 1904 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1905 } 1906 1907 /* allocate the matrix space */ 1908 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1909 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1910 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1911 b->j = (int*)(b->a + nz*bs2); 1912 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1913 b->i = b->j + nz; 1914 b->singlemalloc = PETSC_TRUE; 1915 1916 b->i[0] = 0; 1917 for (i=1; i<mbs+1; i++) { 1918 b->i[i] = b->i[i-1] + b->imax[i-1]; 1919 } 1920 1921 /* b->ilen will count nonzeros in each block row so far. */ 1922 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1923 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1924 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1925 1926 b->bs = bs; 1927 b->bs2 = bs2; 1928 b->mbs = mbs; 1929 b->nz = 0; 1930 b->maxnz = nz*bs2; 1931 B->info.nz_unneeded = (PetscReal)b->maxnz; 1932 PetscFunctionReturn(0); 1933 } 1934 1935