1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled_local,sse_enabled_global; 217 int ierr; 218 219 sse_enabled_local = PETSC_FALSE; 220 sse_enabled_global = PETSC_FALSE; 221 222 ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 223 #if defined(PETSC_HAVE_SSE) 224 if (sse_enabled_local) { 225 a->single_precision_solves = PETSC_TRUE; 226 A->ops->solve = MatSolve_SeqBAIJ_Update; 227 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 228 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 229 break; 230 } else { 231 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 232 } 233 #else 234 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 235 #endif 236 } 237 break; 238 default: 239 SETERRQ(PETSC_ERR_SUP,"unknown option"); 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 246 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 247 { 248 PetscFunctionBegin; 249 if (m) *m = 0; 250 if (n) *n = A->m; 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatGetRow_SeqBAIJ" 256 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 257 { 258 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 259 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 260 MatScalar *aa,*aa_i; 261 PetscScalar *v_i; 262 263 PetscFunctionBegin; 264 bs = a->bs; 265 ai = a->i; 266 aj = a->j; 267 aa = a->a; 268 bs2 = a->bs2; 269 270 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 271 272 bn = row/bs; /* Block number */ 273 bp = row % bs; /* Block Position */ 274 M = ai[bn+1] - ai[bn]; 275 *nz = bs*M; 276 277 if (v) { 278 *v = 0; 279 if (*nz) { 280 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 281 for (i=0; i<M; i++) { /* for each block in the block row */ 282 v_i = *v + i*bs; 283 aa_i = aa + bs2*(ai[bn] + i); 284 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 285 } 286 } 287 } 288 289 if (idx) { 290 *idx = 0; 291 if (*nz) { 292 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 293 for (i=0; i<M; i++) { /* for each block in the block row */ 294 idx_i = *idx + i*bs; 295 itmp = bs*aj[ai[bn] + i]; 296 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 297 } 298 } 299 } 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 305 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 306 { 307 int ierr; 308 309 PetscFunctionBegin; 310 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 311 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatTranspose_SeqBAIJ" 317 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 318 { 319 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 320 Mat C; 321 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 322 int *rows,*cols,bs2=a->bs2; 323 PetscScalar *array; 324 325 PetscFunctionBegin; 326 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 327 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 328 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 329 330 #if defined(PETSC_USE_MAT_SINGLE) 331 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 332 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 333 #else 334 array = a->a; 335 #endif 336 337 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 338 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 339 ierr = PetscFree(col);CHKERRQ(ierr); 340 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 341 cols = rows + bs; 342 for (i=0; i<mbs; i++) { 343 cols[0] = i*bs; 344 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 345 len = ai[i+1] - ai[i]; 346 for (j=0; j<len; j++) { 347 rows[0] = (*aj++)*bs; 348 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 349 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 350 array += bs2; 351 } 352 } 353 ierr = PetscFree(rows);CHKERRQ(ierr); 354 #if defined(PETSC_USE_MAT_SINGLE) 355 ierr = PetscFree(array); 356 #endif 357 358 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 361 if (B) { 362 *B = C; 363 } else { 364 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 365 } 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 371 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 372 { 373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 374 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 375 PetscScalar *aa; 376 FILE *file; 377 378 PetscFunctionBegin; 379 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 380 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 381 col_lens[0] = MAT_COOKIE; 382 383 col_lens[1] = A->m; 384 col_lens[2] = A->n; 385 col_lens[3] = a->nz*bs2; 386 387 /* store lengths of each row and write (including header) to file */ 388 count = 0; 389 for (i=0; i<a->mbs; i++) { 390 for (j=0; j<bs; j++) { 391 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 392 } 393 } 394 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 395 ierr = PetscFree(col_lens);CHKERRQ(ierr); 396 397 /* store column indices (zero start index) */ 398 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 399 count = 0; 400 for (i=0; i<a->mbs; i++) { 401 for (j=0; j<bs; j++) { 402 for (k=a->i[i]; k<a->i[i+1]; k++) { 403 for (l=0; l<bs; l++) { 404 jj[count++] = bs*a->j[k] + l; 405 } 406 } 407 } 408 } 409 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 410 ierr = PetscFree(jj);CHKERRQ(ierr); 411 412 /* store nonzero values */ 413 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 414 count = 0; 415 for (i=0; i<a->mbs; i++) { 416 for (j=0; j<bs; j++) { 417 for (k=a->i[i]; k<a->i[i+1]; k++) { 418 for (l=0; l<bs; l++) { 419 aa[count++] = a->a[bs2*k + l*bs + j]; 420 } 421 } 422 } 423 } 424 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 425 ierr = PetscFree(aa);CHKERRQ(ierr); 426 427 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 428 if (file) { 429 fprintf(file,"-matload_block_size %d\n",a->bs); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 436 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 437 { 438 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 439 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 440 PetscViewerFormat format; 441 442 PetscFunctionBegin; 443 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 444 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 445 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 446 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 447 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 448 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 449 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 450 for (i=0; i<a->mbs; i++) { 451 for (j=0; j<bs; j++) { 452 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 453 for (k=a->i[i]; k<a->i[i+1]; k++) { 454 for (l=0; l<bs; l++) { 455 #if defined(PETSC_USE_COMPLEX) 456 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 458 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 459 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 460 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 461 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 462 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 463 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 464 } 465 #else 466 if (a->a[bs2*k + l*bs + j] != 0.0) { 467 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 468 } 469 #endif 470 } 471 } 472 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 473 } 474 } 475 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 476 } else { 477 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 478 for (i=0; i<a->mbs; i++) { 479 for (j=0; j<bs; j++) { 480 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 481 for (k=a->i[i]; k<a->i[i+1]; k++) { 482 for (l=0; l<bs; l++) { 483 #if defined(PETSC_USE_COMPLEX) 484 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 486 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 487 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 489 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 490 } else { 491 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 492 } 493 #else 494 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 495 #endif 496 } 497 } 498 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 499 } 500 } 501 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 502 } 503 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 504 PetscFunctionReturn(0); 505 } 506 507 #undef __FUNCT__ 508 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 509 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 510 { 511 Mat A = (Mat) Aa; 512 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 513 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 514 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 515 MatScalar *aa; 516 PetscViewer viewer; 517 518 PetscFunctionBegin; 519 520 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 521 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 522 523 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 524 525 /* loop over matrix elements drawing boxes */ 526 color = PETSC_DRAW_BLUE; 527 for (i=0,row=0; i<mbs; i++,row+=bs) { 528 for (j=a->i[i]; j<a->i[i+1]; j++) { 529 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 530 x_l = a->j[j]*bs; x_r = x_l + 1.0; 531 aa = a->a + j*bs2; 532 for (k=0; k<bs; k++) { 533 for (l=0; l<bs; l++) { 534 if (PetscRealPart(*aa++) >= 0.) continue; 535 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 536 } 537 } 538 } 539 } 540 color = PETSC_DRAW_CYAN; 541 for (i=0,row=0; i<mbs; i++,row+=bs) { 542 for (j=a->i[i]; j<a->i[i+1]; j++) { 543 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 544 x_l = a->j[j]*bs; x_r = x_l + 1.0; 545 aa = a->a + j*bs2; 546 for (k=0; k<bs; k++) { 547 for (l=0; l<bs; l++) { 548 if (PetscRealPart(*aa++) != 0.) continue; 549 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 550 } 551 } 552 } 553 } 554 555 color = PETSC_DRAW_RED; 556 for (i=0,row=0; i<mbs; i++,row+=bs) { 557 for (j=a->i[i]; j<a->i[i+1]; j++) { 558 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 559 x_l = a->j[j]*bs; x_r = x_l + 1.0; 560 aa = a->a + j*bs2; 561 for (k=0; k<bs; k++) { 562 for (l=0; l<bs; l++) { 563 if (PetscRealPart(*aa++) <= 0.) continue; 564 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 565 } 566 } 567 } 568 } 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 574 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 575 { 576 int ierr; 577 PetscReal xl,yl,xr,yr,w,h; 578 PetscDraw draw; 579 PetscTruth isnull; 580 581 PetscFunctionBegin; 582 583 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 584 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 585 586 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 587 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 588 xr += w; yr += h; xl = -w; yl = -h; 589 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 590 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 591 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatView_SeqBAIJ" 597 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 598 { 599 int ierr; 600 PetscTruth issocket,isascii,isbinary,isdraw; 601 602 PetscFunctionBegin; 603 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 604 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 605 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 606 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 607 if (issocket) { 608 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 609 } else if (isascii){ 610 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 611 } else if (isbinary) { 612 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 613 } else if (isdraw) { 614 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 615 } else { 616 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 617 } 618 PetscFunctionReturn(0); 619 } 620 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatGetValues_SeqBAIJ" 624 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 625 { 626 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 627 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 628 int *ai = a->i,*ailen = a->ilen; 629 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 630 MatScalar *ap,*aa = a->a,zero = 0.0; 631 632 PetscFunctionBegin; 633 for (k=0; k<m; k++) { /* loop over rows */ 634 row = im[k]; brow = row/bs; 635 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 636 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 637 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 638 nrow = ailen[brow]; 639 for (l=0; l<n; l++) { /* loop over columns */ 640 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 641 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 642 col = in[l] ; 643 bcol = col/bs; 644 cidx = col%bs; 645 ridx = row%bs; 646 high = nrow; 647 low = 0; /* assume unsorted */ 648 while (high-low > 5) { 649 t = (low+high)/2; 650 if (rp[t] > bcol) high = t; 651 else low = t; 652 } 653 for (i=low; i<high; i++) { 654 if (rp[i] > bcol) break; 655 if (rp[i] == bcol) { 656 *v++ = ap[bs2*i+bs*cidx+ridx]; 657 goto finished; 658 } 659 } 660 *v++ = zero; 661 finished:; 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 #if defined(PETSC_USE_MAT_SINGLE) 668 #undef __FUNCT__ 669 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 670 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 671 { 672 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 673 int ierr,i,N = m*n*b->bs2; 674 MatScalar *vsingle; 675 676 PetscFunctionBegin; 677 if (N > b->setvalueslen) { 678 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 679 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 680 b->setvalueslen = N; 681 } 682 vsingle = b->setvaluescopy; 683 for (i=0; i<N; i++) { 684 vsingle[i] = v[i]; 685 } 686 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 #endif 690 691 692 #undef __FUNCT__ 693 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 694 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 695 { 696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 697 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 698 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 699 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 700 PetscTruth roworiented=a->roworiented; 701 MatScalar *value = v,*ap,*aa = a->a,*bap; 702 703 PetscFunctionBegin; 704 if (roworiented) { 705 stepval = (n-1)*bs; 706 } else { 707 stepval = (m-1)*bs; 708 } 709 for (k=0; k<m; k++) { /* loop over added rows */ 710 row = im[k]; 711 if (row < 0) continue; 712 #if defined(PETSC_USE_BOPT_g) 713 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 714 #endif 715 rp = aj + ai[row]; 716 ap = aa + bs2*ai[row]; 717 rmax = imax[row]; 718 nrow = ailen[row]; 719 low = 0; 720 for (l=0; l<n; l++) { /* loop over added columns */ 721 if (in[l] < 0) continue; 722 #if defined(PETSC_USE_BOPT_g) 723 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 724 #endif 725 col = in[l]; 726 if (roworiented) { 727 value = v + k*(stepval+bs)*bs + l*bs; 728 } else { 729 value = v + l*(stepval+bs)*bs + k*bs; 730 } 731 if (!sorted) low = 0; high = nrow; 732 while (high-low > 7) { 733 t = (low+high)/2; 734 if (rp[t] > col) high = t; 735 else low = t; 736 } 737 for (i=low; i<high; i++) { 738 if (rp[i] > col) break; 739 if (rp[i] == col) { 740 bap = ap + bs2*i; 741 if (roworiented) { 742 if (is == ADD_VALUES) { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] += *value++; 746 } 747 } 748 } else { 749 for (ii=0; ii<bs; ii++,value+=stepval) { 750 for (jj=ii; jj<bs2; jj+=bs) { 751 bap[jj] = *value++; 752 } 753 } 754 } 755 } else { 756 if (is == ADD_VALUES) { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ += *value++; 760 } 761 } 762 } else { 763 for (ii=0; ii<bs; ii++,value+=stepval) { 764 for (jj=0; jj<bs; jj++) { 765 *bap++ = *value++; 766 } 767 } 768 } 769 } 770 goto noinsert2; 771 } 772 } 773 if (nonew == 1) goto noinsert2; 774 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 if (nrow >= rmax) { 776 /* there is no extra room in row, therefore enlarge */ 777 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 778 MatScalar *new_a; 779 780 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 781 782 /* malloc new storage space */ 783 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 784 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 785 new_j = (int*)(new_a + bs2*new_nz); 786 new_i = new_j + new_nz; 787 788 /* copy over old data into new slots */ 789 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 790 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 791 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 792 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 793 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 794 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 795 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 796 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 797 /* free up old matrix storage */ 798 ierr = PetscFree(a->a);CHKERRQ(ierr); 799 if (!a->singlemalloc) { 800 ierr = PetscFree(a->i);CHKERRQ(ierr); 801 ierr = PetscFree(a->j);CHKERRQ(ierr); 802 } 803 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 804 a->singlemalloc = PETSC_TRUE; 805 806 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 807 rmax = imax[row] = imax[row] + CHUNKSIZE; 808 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 809 a->maxnz += bs2*CHUNKSIZE; 810 a->reallocs++; 811 a->nz++; 812 } 813 N = nrow++ - 1; 814 /* shift up all the later entries in this row */ 815 for (ii=N; ii>=i; ii--) { 816 rp[ii+1] = rp[ii]; 817 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 818 } 819 if (N >= i) { 820 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 821 } 822 rp[i] = col; 823 bap = ap + bs2*i; 824 if (roworiented) { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=ii; jj<bs2; jj+=bs) { 827 bap[jj] = *value++; 828 } 829 } 830 } else { 831 for (ii=0; ii<bs; ii++,value+=stepval) { 832 for (jj=0; jj<bs; jj++) { 833 *bap++ = *value++; 834 } 835 } 836 } 837 noinsert2:; 838 low = i; 839 } 840 ailen[row] = nrow; 841 } 842 PetscFunctionReturn(0); 843 } 844 845 846 #undef __FUNCT__ 847 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 848 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 851 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 852 int m = A->m,*ip,N,*ailen = a->ilen; 853 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 854 MatScalar *aa = a->a,*ap; 855 856 PetscFunctionBegin; 857 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 858 859 if (m) rmax = ailen[0]; 860 for (i=1; i<mbs; i++) { 861 /* move each row back by the amount of empty slots (fshift) before it*/ 862 fshift += imax[i-1] - ailen[i-1]; 863 rmax = PetscMax(rmax,ailen[i]); 864 if (fshift) { 865 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 866 N = ailen[i]; 867 for (j=0; j<N; j++) { 868 ip[j-fshift] = ip[j]; 869 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 870 } 871 } 872 ai[i] = ai[i-1] + ailen[i-1]; 873 } 874 if (mbs) { 875 fshift += imax[mbs-1] - ailen[mbs-1]; 876 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 877 } 878 /* reset ilen and imax for each row */ 879 for (i=0; i<mbs; i++) { 880 ailen[i] = imax[i] = ai[i+1] - ai[i]; 881 } 882 a->nz = ai[mbs]; 883 884 /* diagonals may have moved, so kill the diagonal pointers */ 885 if (fshift && a->diag) { 886 ierr = PetscFree(a->diag);CHKERRQ(ierr); 887 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 888 a->diag = 0; 889 } 890 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 891 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 892 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 893 a->reallocs); 894 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 895 a->reallocs = 0; 896 A->info.nz_unneeded = (PetscReal)fshift*bs2; 897 898 PetscFunctionReturn(0); 899 } 900 901 902 903 /* 904 This function returns an array of flags which indicate the locations of contiguous 905 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 906 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 907 Assume: sizes should be long enough to hold all the values. 908 */ 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 911 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 912 { 913 int i,j,k,row; 914 PetscTruth flg; 915 916 PetscFunctionBegin; 917 for (i=0,j=0; i<n; j++) { 918 row = idx[i]; 919 if (row%bs!=0) { /* Not the begining of a block */ 920 sizes[j] = 1; 921 i++; 922 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 923 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 924 i++; 925 } else { /* Begining of the block, so check if the complete block exists */ 926 flg = PETSC_TRUE; 927 for (k=1; k<bs; k++) { 928 if (row+k != idx[i+k]) { /* break in the block */ 929 flg = PETSC_FALSE; 930 break; 931 } 932 } 933 if (flg == PETSC_TRUE) { /* No break in the bs */ 934 sizes[j] = bs; 935 i+= bs; 936 } else { 937 sizes[j] = 1; 938 i++; 939 } 940 } 941 } 942 *bs_max = j; 943 PetscFunctionReturn(0); 944 } 945 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 948 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 949 { 950 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 951 int ierr,i,j,k,count,is_n,*is_idx,*rows; 952 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 953 PetscScalar zero = 0.0; 954 MatScalar *aa; 955 956 PetscFunctionBegin; 957 /* Make a copy of the IS and sort it */ 958 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 959 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 960 961 /* allocate memory for rows,sizes */ 962 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 963 sizes = rows + is_n; 964 965 /* copy IS values to rows, and sort them */ 966 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 967 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 968 if (baij->keepzeroedrows) { 969 for (i=0; i<is_n; i++) { sizes[i] = 1; } 970 bs_max = is_n; 971 } else { 972 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 973 } 974 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 975 976 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 977 row = rows[j]; 978 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 979 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 980 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 981 if (sizes[i] == bs && !baij->keepzeroedrows) { 982 if (diag) { 983 if (baij->ilen[row/bs] > 0) { 984 baij->ilen[row/bs] = 1; 985 baij->j[baij->i[row/bs]] = row/bs; 986 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 987 } 988 /* Now insert all the diagonal values for this bs */ 989 for (k=0; k<bs; k++) { 990 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 991 } 992 } else { /* (!diag) */ 993 baij->ilen[row/bs] = 0; 994 } /* end (!diag) */ 995 } else { /* (sizes[i] != bs) */ 996 #if defined (PETSC_USE_DEBUG) 997 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 998 #endif 999 for (k=0; k<count; k++) { 1000 aa[0] = zero; 1001 aa += bs; 1002 } 1003 if (diag) { 1004 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1005 } 1006 } 1007 } 1008 1009 ierr = PetscFree(rows);CHKERRQ(ierr); 1010 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1016 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 1017 { 1018 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1019 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1020 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1021 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1022 int ridx,cidx,bs2=a->bs2,ierr; 1023 PetscTruth roworiented=a->roworiented; 1024 MatScalar *ap,value,*aa=a->a,*bap; 1025 1026 PetscFunctionBegin; 1027 for (k=0; k<m; k++) { /* loop over added rows */ 1028 row = im[k]; brow = row/bs; 1029 if (row < 0) continue; 1030 #if defined(PETSC_USE_BOPT_g) 1031 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1032 #endif 1033 rp = aj + ai[brow]; 1034 ap = aa + bs2*ai[brow]; 1035 rmax = imax[brow]; 1036 nrow = ailen[brow]; 1037 low = 0; 1038 for (l=0; l<n; l++) { /* loop over added columns */ 1039 if (in[l] < 0) continue; 1040 #if defined(PETSC_USE_BOPT_g) 1041 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1042 #endif 1043 col = in[l]; bcol = col/bs; 1044 ridx = row % bs; cidx = col % bs; 1045 if (roworiented) { 1046 value = v[l + k*n]; 1047 } else { 1048 value = v[k + l*m]; 1049 } 1050 if (!sorted) low = 0; high = nrow; 1051 while (high-low > 7) { 1052 t = (low+high)/2; 1053 if (rp[t] > bcol) high = t; 1054 else low = t; 1055 } 1056 for (i=low; i<high; i++) { 1057 if (rp[i] > bcol) break; 1058 if (rp[i] == bcol) { 1059 bap = ap + bs2*i + bs*cidx + ridx; 1060 if (is == ADD_VALUES) *bap += value; 1061 else *bap = value; 1062 goto noinsert1; 1063 } 1064 } 1065 if (nonew == 1) goto noinsert1; 1066 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 if (nrow >= rmax) { 1068 /* there is no extra room in row, therefore enlarge */ 1069 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1070 MatScalar *new_a; 1071 1072 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1073 1074 /* Malloc new storage space */ 1075 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1076 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1077 new_j = (int*)(new_a + bs2*new_nz); 1078 new_i = new_j + new_nz; 1079 1080 /* copy over old data into new slots */ 1081 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1082 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1083 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1084 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1085 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1086 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1087 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1088 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1089 /* free up old matrix storage */ 1090 ierr = PetscFree(a->a);CHKERRQ(ierr); 1091 if (!a->singlemalloc) { 1092 ierr = PetscFree(a->i);CHKERRQ(ierr); 1093 ierr = PetscFree(a->j);CHKERRQ(ierr); 1094 } 1095 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1096 a->singlemalloc = PETSC_TRUE; 1097 1098 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1099 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1100 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1101 a->maxnz += bs2*CHUNKSIZE; 1102 a->reallocs++; 1103 a->nz++; 1104 } 1105 N = nrow++ - 1; 1106 /* shift up all the later entries in this row */ 1107 for (ii=N; ii>=i; ii--) { 1108 rp[ii+1] = rp[ii]; 1109 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1110 } 1111 if (N>=i) { 1112 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1113 } 1114 rp[i] = bcol; 1115 ap[bs2*i + bs*cidx + ridx] = value; 1116 noinsert1:; 1117 low = i; 1118 } 1119 ailen[brow] = nrow; 1120 } 1121 PetscFunctionReturn(0); 1122 } 1123 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1127 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1128 { 1129 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1130 Mat outA; 1131 int ierr; 1132 PetscTruth row_identity,col_identity; 1133 1134 PetscFunctionBegin; 1135 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1136 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1137 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1138 if (!row_identity || !col_identity) { 1139 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1140 } 1141 1142 outA = inA; 1143 inA->factor = FACTOR_LU; 1144 1145 if (!a->diag) { 1146 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1147 } 1148 1149 a->row = row; 1150 a->col = col; 1151 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1152 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1153 1154 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1155 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1156 PetscLogObjectParent(inA,a->icol); 1157 1158 /* 1159 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1160 for ILU(0) factorization with natural ordering 1161 */ 1162 if (a->bs < 8) { 1163 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1164 } else { 1165 if (!a->solve_work) { 1166 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1167 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1168 } 1169 } 1170 1171 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1172 1173 PetscFunctionReturn(0); 1174 } 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1177 int MatPrintHelp_SeqBAIJ(Mat A) 1178 { 1179 static PetscTruth called = PETSC_FALSE; 1180 MPI_Comm comm = A->comm; 1181 int ierr; 1182 1183 PetscFunctionBegin; 1184 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1185 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1186 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 EXTERN_C_BEGIN 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1193 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1194 { 1195 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1196 int i,nz,n; 1197 1198 PetscFunctionBegin; 1199 nz = baij->maxnz; 1200 n = mat->n; 1201 for (i=0; i<nz; i++) { 1202 baij->j[i] = indices[i]; 1203 } 1204 baij->nz = nz; 1205 for (i=0; i<n; i++) { 1206 baij->ilen[i] = baij->imax[i]; 1207 } 1208 1209 PetscFunctionReturn(0); 1210 } 1211 EXTERN_C_END 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1215 /*@ 1216 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1217 in the matrix. 1218 1219 Input Parameters: 1220 + mat - the SeqBAIJ matrix 1221 - indices - the column indices 1222 1223 Level: advanced 1224 1225 Notes: 1226 This can be called if you have precomputed the nonzero structure of the 1227 matrix and want to provide it to the matrix object to improve the performance 1228 of the MatSetValues() operation. 1229 1230 You MUST have set the correct numbers of nonzeros per row in the call to 1231 MatCreateSeqBAIJ(). 1232 1233 MUST be called before any calls to MatSetValues(); 1234 1235 @*/ 1236 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1237 { 1238 int ierr,(*f)(Mat,int *); 1239 1240 PetscFunctionBegin; 1241 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1242 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1243 if (f) { 1244 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1245 } else { 1246 SETERRQ(1,"Wrong type of matrix to set column indices"); 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1253 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1254 { 1255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1256 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1257 PetscReal atmp; 1258 PetscScalar *x,zero = 0.0; 1259 MatScalar *aa; 1260 int ncols,brow,krow,kcol; 1261 1262 PetscFunctionBegin; 1263 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1264 bs = a->bs; 1265 aa = a->a; 1266 ai = a->i; 1267 aj = a->j; 1268 mbs = a->mbs; 1269 1270 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1271 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1272 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1273 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1274 for (i=0; i<mbs; i++) { 1275 ncols = ai[1] - ai[0]; ai++; 1276 brow = bs*i; 1277 for (j=0; j<ncols; j++){ 1278 /* bcol = bs*(*aj); */ 1279 for (kcol=0; kcol<bs; kcol++){ 1280 for (krow=0; krow<bs; krow++){ 1281 atmp = PetscAbsScalar(*aa); aa++; 1282 row = brow + krow; /* row index */ 1283 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1284 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1285 } 1286 } 1287 aj++; 1288 } 1289 } 1290 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1296 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1297 { 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1307 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1310 PetscFunctionBegin; 1311 *array = a->a; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1317 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1318 { 1319 PetscFunctionBegin; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* -------------------------------------------------------------------*/ 1324 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1325 MatGetRow_SeqBAIJ, 1326 MatRestoreRow_SeqBAIJ, 1327 MatMult_SeqBAIJ_N, 1328 MatMultAdd_SeqBAIJ_N, 1329 MatMultTranspose_SeqBAIJ, 1330 MatMultTransposeAdd_SeqBAIJ, 1331 MatSolve_SeqBAIJ_N, 1332 0, 1333 0, 1334 0, 1335 MatLUFactor_SeqBAIJ, 1336 0, 1337 0, 1338 MatTranspose_SeqBAIJ, 1339 MatGetInfo_SeqBAIJ, 1340 MatEqual_SeqBAIJ, 1341 MatGetDiagonal_SeqBAIJ, 1342 MatDiagonalScale_SeqBAIJ, 1343 MatNorm_SeqBAIJ, 1344 0, 1345 MatAssemblyEnd_SeqBAIJ, 1346 0, 1347 MatSetOption_SeqBAIJ, 1348 MatZeroEntries_SeqBAIJ, 1349 MatZeroRows_SeqBAIJ, 1350 MatLUFactorSymbolic_SeqBAIJ, 1351 MatLUFactorNumeric_SeqBAIJ_N, 1352 0, 1353 0, 1354 MatSetUpPreallocation_SeqBAIJ, 1355 0, 1356 MatGetOwnershipRange_SeqBAIJ, 1357 MatILUFactorSymbolic_SeqBAIJ, 1358 0, 1359 MatGetArray_SeqBAIJ, 1360 MatRestoreArray_SeqBAIJ, 1361 MatDuplicate_SeqBAIJ, 1362 0, 1363 0, 1364 MatILUFactor_SeqBAIJ, 1365 0, 1366 0, 1367 MatGetSubMatrices_SeqBAIJ, 1368 MatIncreaseOverlap_SeqBAIJ, 1369 MatGetValues_SeqBAIJ, 1370 0, 1371 MatPrintHelp_SeqBAIJ, 1372 MatScale_SeqBAIJ, 1373 0, 1374 0, 1375 0, 1376 MatGetBlockSize_SeqBAIJ, 1377 MatGetRowIJ_SeqBAIJ, 1378 MatRestoreRowIJ_SeqBAIJ, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 0, 1385 MatSetValuesBlocked_SeqBAIJ, 1386 MatGetSubMatrix_SeqBAIJ, 1387 MatDestroy_SeqBAIJ, 1388 MatView_SeqBAIJ, 1389 MatGetPetscMaps_Petsc, 1390 0, 1391 0, 1392 0, 1393 0, 1394 0, 1395 0, 1396 MatGetRowMax_SeqBAIJ, 1397 MatConvert_Basic}; 1398 1399 EXTERN_C_BEGIN 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1402 int MatStoreValues_SeqBAIJ(Mat mat) 1403 { 1404 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1405 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 if (aij->nonew != 1) { 1410 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1411 } 1412 1413 /* allocate space for values if not already there */ 1414 if (!aij->saved_values) { 1415 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1416 } 1417 1418 /* copy values over */ 1419 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1420 PetscFunctionReturn(0); 1421 } 1422 EXTERN_C_END 1423 1424 EXTERN_C_BEGIN 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1427 int MatRetrieveValues_SeqBAIJ(Mat mat) 1428 { 1429 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1430 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1431 1432 PetscFunctionBegin; 1433 if (aij->nonew != 1) { 1434 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1435 } 1436 if (!aij->saved_values) { 1437 SETERRQ(1,"Must call MatStoreValues(A);first"); 1438 } 1439 1440 /* copy values over */ 1441 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 EXTERN_C_END 1445 1446 EXTERN_C_BEGIN 1447 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1448 EXTERN_C_END 1449 1450 EXTERN_C_BEGIN 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatCreate_SeqBAIJ" 1453 int MatCreate_SeqBAIJ(Mat B) 1454 { 1455 int ierr,size; 1456 Mat_SeqBAIJ *b; 1457 1458 PetscFunctionBegin; 1459 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1460 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1461 1462 B->m = B->M = PetscMax(B->m,B->M); 1463 B->n = B->N = PetscMax(B->n,B->N); 1464 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1465 B->data = (void*)b; 1466 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1467 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1468 B->factor = 0; 1469 B->lupivotthreshold = 1.0; 1470 B->mapping = 0; 1471 b->row = 0; 1472 b->col = 0; 1473 b->icol = 0; 1474 b->reallocs = 0; 1475 b->saved_values = 0; 1476 #if defined(PETSC_USE_MAT_SINGLE) 1477 b->setvalueslen = 0; 1478 b->setvaluescopy = PETSC_NULL; 1479 #endif 1480 b->single_precision_solves = PETSC_FALSE; 1481 1482 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1483 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1484 1485 b->sorted = PETSC_FALSE; 1486 b->roworiented = PETSC_TRUE; 1487 b->nonew = 0; 1488 b->diag = 0; 1489 b->solve_work = 0; 1490 b->mult_work = 0; 1491 b->spptr = 0; 1492 B->info.nz_unneeded = (PetscReal)b->maxnz; 1493 b->keepzeroedrows = PETSC_FALSE; 1494 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1496 "MatStoreValues_SeqBAIJ", 1497 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1498 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1499 "MatRetrieveValues_SeqBAIJ", 1500 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1501 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1502 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1503 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1504 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1505 "MatConvert_SeqBAIJ_SeqAIJ", 1506 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 EXTERN_C_END 1510 1511 #undef __FUNCT__ 1512 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1513 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1514 { 1515 Mat C; 1516 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1517 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1518 1519 PetscFunctionBegin; 1520 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1521 1522 *B = 0; 1523 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1524 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1525 c = (Mat_SeqBAIJ*)C->data; 1526 1527 c->bs = a->bs; 1528 c->bs2 = a->bs2; 1529 c->mbs = a->mbs; 1530 c->nbs = a->nbs; 1531 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1532 1533 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1534 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1535 for (i=0; i<mbs; i++) { 1536 c->imax[i] = a->imax[i]; 1537 c->ilen[i] = a->ilen[i]; 1538 } 1539 1540 /* allocate the matrix space */ 1541 c->singlemalloc = PETSC_TRUE; 1542 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1543 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1544 c->j = (int*)(c->a + nz*bs2); 1545 c->i = c->j + nz; 1546 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1547 if (mbs > 0) { 1548 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1549 if (cpvalues == MAT_COPY_VALUES) { 1550 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1551 } else { 1552 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1553 } 1554 } 1555 1556 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1557 c->sorted = a->sorted; 1558 c->roworiented = a->roworiented; 1559 c->nonew = a->nonew; 1560 1561 if (a->diag) { 1562 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1563 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1564 for (i=0; i<mbs; i++) { 1565 c->diag[i] = a->diag[i]; 1566 } 1567 } else c->diag = 0; 1568 c->nz = a->nz; 1569 c->maxnz = a->maxnz; 1570 c->solve_work = 0; 1571 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1572 c->mult_work = 0; 1573 C->preallocated = PETSC_TRUE; 1574 C->assembled = PETSC_TRUE; 1575 *B = C; 1576 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatLoad_SeqBAIJ" 1583 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1584 { 1585 Mat_SeqBAIJ *a; 1586 Mat B; 1587 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1588 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1589 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1590 int *masked,nmask,tmp,bs2,ishift; 1591 PetscScalar *aa; 1592 MPI_Comm comm = ((PetscObject)viewer)->comm; 1593 1594 PetscFunctionBegin; 1595 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1596 bs2 = bs*bs; 1597 1598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1599 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1600 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1601 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1602 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1603 M = header[1]; N = header[2]; nz = header[3]; 1604 1605 if (header[3] < 0) { 1606 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1607 } 1608 1609 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1610 1611 /* 1612 This code adds extra rows to make sure the number of rows is 1613 divisible by the blocksize 1614 */ 1615 mbs = M/bs; 1616 extra_rows = bs - M + bs*(mbs); 1617 if (extra_rows == bs) extra_rows = 0; 1618 else mbs++; 1619 if (extra_rows) { 1620 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1621 } 1622 1623 /* read in row lengths */ 1624 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1625 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1626 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1627 1628 /* read in column indices */ 1629 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1630 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1631 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1632 1633 /* loop over row lengths determining block row lengths */ 1634 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1635 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1636 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1637 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1638 masked = mask + mbs; 1639 rowcount = 0; nzcount = 0; 1640 for (i=0; i<mbs; i++) { 1641 nmask = 0; 1642 for (j=0; j<bs; j++) { 1643 kmax = rowlengths[rowcount]; 1644 for (k=0; k<kmax; k++) { 1645 tmp = jj[nzcount++]/bs; 1646 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1647 } 1648 rowcount++; 1649 } 1650 browlengths[i] += nmask; 1651 /* zero out the mask elements we set */ 1652 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1653 } 1654 1655 /* create our matrix */ 1656 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1657 B = *A; 1658 a = (Mat_SeqBAIJ*)B->data; 1659 1660 /* set matrix "i" values */ 1661 a->i[0] = 0; 1662 for (i=1; i<= mbs; i++) { 1663 a->i[i] = a->i[i-1] + browlengths[i-1]; 1664 a->ilen[i-1] = browlengths[i-1]; 1665 } 1666 a->nz = 0; 1667 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1668 1669 /* read in nonzero values */ 1670 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1671 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1672 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1673 1674 /* set "a" and "j" values into matrix */ 1675 nzcount = 0; jcount = 0; 1676 for (i=0; i<mbs; i++) { 1677 nzcountb = nzcount; 1678 nmask = 0; 1679 for (j=0; j<bs; j++) { 1680 kmax = rowlengths[i*bs+j]; 1681 for (k=0; k<kmax; k++) { 1682 tmp = jj[nzcount++]/bs; 1683 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1684 } 1685 } 1686 /* sort the masked values */ 1687 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1688 1689 /* set "j" values into matrix */ 1690 maskcount = 1; 1691 for (j=0; j<nmask; j++) { 1692 a->j[jcount++] = masked[j]; 1693 mask[masked[j]] = maskcount++; 1694 } 1695 /* set "a" values into matrix */ 1696 ishift = bs2*a->i[i]; 1697 for (j=0; j<bs; j++) { 1698 kmax = rowlengths[i*bs+j]; 1699 for (k=0; k<kmax; k++) { 1700 tmp = jj[nzcountb]/bs ; 1701 block = mask[tmp] - 1; 1702 point = jj[nzcountb] - bs*tmp; 1703 idx = ishift + bs2*block + j + bs*point; 1704 a->a[idx] = (MatScalar)aa[nzcountb++]; 1705 } 1706 } 1707 /* zero out the mask elements we set */ 1708 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1709 } 1710 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1711 1712 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1713 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1714 ierr = PetscFree(aa);CHKERRQ(ierr); 1715 ierr = PetscFree(jj);CHKERRQ(ierr); 1716 ierr = PetscFree(mask);CHKERRQ(ierr); 1717 1718 B->assembled = PETSC_TRUE; 1719 1720 ierr = MatView_Private(B);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 EXTERN_C_END 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatCreateSeqBAIJ" 1727 /*@C 1728 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1729 compressed row) format. For good matrix assembly performance the 1730 user should preallocate the matrix storage by setting the parameter nz 1731 (or the array nnz). By setting these parameters accurately, performance 1732 during matrix assembly can be increased by more than a factor of 50. 1733 1734 Collective on MPI_Comm 1735 1736 Input Parameters: 1737 + comm - MPI communicator, set to PETSC_COMM_SELF 1738 . bs - size of block 1739 . m - number of rows 1740 . n - number of columns 1741 . nz - number of nonzero blocks per block row (same for all rows) 1742 - nnz - array containing the number of nonzero blocks in the various block rows 1743 (possibly different for each block row) or PETSC_NULL 1744 1745 Output Parameter: 1746 . A - the matrix 1747 1748 Options Database Keys: 1749 . -mat_no_unroll - uses code that does not unroll the loops in the 1750 block calculations (much slower) 1751 . -mat_block_size - size of the blocks to use 1752 1753 Level: intermediate 1754 1755 Notes: 1756 A nonzero block is any block that as 1 or more nonzeros in it 1757 1758 The block AIJ format is fully compatible with standard Fortran 77 1759 storage. That is, the stored row and column indices can begin at 1760 either one (as in Fortran) or zero. See the users' manual for details. 1761 1762 Specify the preallocated storage with either nz or nnz (not both). 1763 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1764 allocation. For additional details, see the users manual chapter on 1765 matrices. 1766 1767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1768 @*/ 1769 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1770 { 1771 int ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1775 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1776 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1782 /*@C 1783 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1784 per row in the matrix. For good matrix assembly performance the 1785 user should preallocate the matrix storage by setting the parameter nz 1786 (or the array nnz). By setting these parameters accurately, performance 1787 during matrix assembly can be increased by more than a factor of 50. 1788 1789 Collective on MPI_Comm 1790 1791 Input Parameters: 1792 + A - the matrix 1793 . bs - size of block 1794 . nz - number of block nonzeros per block row (same for all rows) 1795 - nnz - array containing the number of block nonzeros in the various block rows 1796 (possibly different for each block row) or PETSC_NULL 1797 1798 Options Database Keys: 1799 . -mat_no_unroll - uses code that does not unroll the loops in the 1800 block calculations (much slower) 1801 . -mat_block_size - size of the blocks to use 1802 1803 Level: intermediate 1804 1805 Notes: 1806 The block AIJ format is fully compatible with standard Fortran 77 1807 storage. That is, the stored row and column indices can begin at 1808 either one (as in Fortran) or zero. See the users' manual for details. 1809 1810 Specify the preallocated storage with either nz or nnz (not both). 1811 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1812 allocation. For additional details, see the users manual chapter on 1813 matrices. 1814 1815 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1816 @*/ 1817 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1818 { 1819 Mat_SeqBAIJ *b; 1820 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1821 PetscTruth flg; 1822 1823 PetscFunctionBegin; 1824 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1825 if (!flg) PetscFunctionReturn(0); 1826 1827 B->preallocated = PETSC_TRUE; 1828 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1829 if (nnz && newbs != bs) { 1830 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1831 } 1832 bs = newbs; 1833 1834 mbs = B->m/bs; 1835 nbs = B->n/bs; 1836 bs2 = bs*bs; 1837 1838 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1839 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1840 } 1841 1842 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1843 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1844 if (nnz) { 1845 for (i=0; i<mbs; i++) { 1846 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1847 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1848 } 1849 } 1850 1851 b = (Mat_SeqBAIJ*)B->data; 1852 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1853 B->ops->solve = MatSolve_SeqBAIJ_Update; 1854 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1855 if (!flg) { 1856 switch (bs) { 1857 case 1: 1858 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1859 B->ops->mult = MatMult_SeqBAIJ_1; 1860 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1861 break; 1862 case 2: 1863 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1864 B->ops->mult = MatMult_SeqBAIJ_2; 1865 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1866 break; 1867 case 3: 1868 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1869 B->ops->mult = MatMult_SeqBAIJ_3; 1870 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1871 break; 1872 case 4: 1873 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1874 B->ops->mult = MatMult_SeqBAIJ_4; 1875 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1876 break; 1877 case 5: 1878 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1879 B->ops->mult = MatMult_SeqBAIJ_5; 1880 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1881 break; 1882 case 6: 1883 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1884 B->ops->mult = MatMult_SeqBAIJ_6; 1885 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1886 break; 1887 case 7: 1888 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1889 B->ops->mult = MatMult_SeqBAIJ_7; 1890 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1891 break; 1892 default: 1893 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1894 B->ops->mult = MatMult_SeqBAIJ_N; 1895 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1896 break; 1897 } 1898 } 1899 b->bs = bs; 1900 b->mbs = mbs; 1901 b->nbs = nbs; 1902 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1903 if (!nnz) { 1904 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1905 else if (nz <= 0) nz = 1; 1906 for (i=0; i<mbs; i++) b->imax[i] = nz; 1907 nz = nz*mbs; 1908 } else { 1909 nz = 0; 1910 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1911 } 1912 1913 /* allocate the matrix space */ 1914 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1915 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1916 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1917 b->j = (int*)(b->a + nz*bs2); 1918 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1919 b->i = b->j + nz; 1920 b->singlemalloc = PETSC_TRUE; 1921 1922 b->i[0] = 0; 1923 for (i=1; i<mbs+1; i++) { 1924 b->i[i] = b->i[i-1] + b->imax[i-1]; 1925 } 1926 1927 /* b->ilen will count nonzeros in each block row so far. */ 1928 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1929 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1930 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1931 1932 b->bs = bs; 1933 b->bs2 = bs2; 1934 b->mbs = mbs; 1935 b->nz = 0; 1936 b->maxnz = nz*bs2; 1937 B->info.nz_unneeded = (PetscReal)b->maxnz; 1938 PetscFunctionReturn(0); 1939 } 1940 1941