1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.185 1999/10/13 20:37:28 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "sys.h" 10 #include "src/mat/impls/baij/seq/baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 14 #define CHUNKSIZE 10 15 16 /* 17 Checks for missing diagonals 18 */ 19 #undef __FUNC__ 20 #define __FUNC__ "MatMissingDiag_SeqBAIJ" 21 int MatMissingDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int *diag = a->diag, *jj = a->j,i; 25 26 PetscFunctionBegin; 27 for ( i=0; i<a->mbs; i++ ) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 37 int MatMarkDiag_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 40 int i,j, *diag, m = a->mbs; 41 42 PetscFunctionBegin; 43 diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag); 44 PLogObjectMemory(A,(m+1)*sizeof(int)); 45 for ( i=0; i<m; i++ ) { 46 diag[i] = a->i[i+1]; 47 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 48 if (a->j[j] == i) { 49 diag[i] = j; 50 break; 51 } 52 } 53 } 54 a->diag = diag; 55 PetscFunctionReturn(0); 56 } 57 58 59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 60 61 #undef __FUNC__ 62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 64 PetscTruth *done) 65 { 66 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 67 int ierr, n = a->mbs,i; 68 69 PetscFunctionBegin; 70 *nn = n; 71 if (!ia) PetscFunctionReturn(0); 72 if (symmetric) { 73 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 74 } else if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 int nz = a->i[n] + 1; 77 for ( i=0; i<nz; i++ ) a->j[i]++; 78 for ( i=0; i<n+1; i++ ) a->i[i]++; 79 *ia = a->i; *ja = a->j; 80 } else { 81 *ia = a->i; *ja = a->j; 82 } 83 84 PetscFunctionReturn(0); 85 } 86 87 #undef __FUNC__ 88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 90 PetscTruth *done) 91 { 92 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 93 int i,n = a->mbs,ierr; 94 95 PetscFunctionBegin; 96 if (!ia) PetscFunctionReturn(0); 97 if (symmetric) { 98 ierr = PetscFree(*ia);CHKERRQ(ierr); 99 ierr = PetscFree(*ja);CHKERRQ(ierr); 100 } else if (oshift == 1) { 101 int nz = a->i[n]; 102 for ( i=0; i<nz; i++ ) a->j[i]--; 103 for ( i=0; i<n+1; i++ ) a->i[i]--; 104 } 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNC__ 109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 111 { 112 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 113 114 PetscFunctionBegin; 115 *bs = baij->bs; 116 PetscFunctionReturn(0); 117 } 118 119 120 #undef __FUNC__ 121 #define __FUNC__ "MatDestroy_SeqBAIJ" 122 int MatDestroy_SeqBAIJ(Mat A) 123 { 124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 125 int ierr; 126 127 PetscFunctionBegin; 128 if (A->mapping) { 129 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 130 } 131 if (A->bmapping) { 132 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 133 } 134 if (A->rmap) { 135 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 136 } 137 if (A->cmap) { 138 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 139 } 140 #if defined(PETSC_USE_LOG) 141 PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 142 #endif 143 ierr = PetscFree(a->a);CHKERRQ(ierr); 144 if (!a->singlemalloc) { 145 ierr = PetscFree(a->i);CHKERRQ(ierr); 146 ierr = PetscFree(a->j);CHKERRQ(ierr); 147 } 148 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 149 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 150 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 151 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 152 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 153 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 154 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 155 ierr = PetscFree(a);CHKERRQ(ierr); 156 PLogObjectDestroy(A); 157 PetscHeaderDestroy(A); 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNC__ 162 #define __FUNC__ "MatSetOption_SeqBAIJ" 163 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 164 { 165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 166 167 PetscFunctionBegin; 168 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 169 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 170 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 171 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 172 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 173 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 174 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 175 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 176 else if (op == MAT_ROWS_SORTED || 177 op == MAT_ROWS_UNSORTED || 178 op == MAT_SYMMETRIC || 179 op == MAT_STRUCTURALLY_SYMMETRIC || 180 op == MAT_YES_NEW_DIAGONALS || 181 op == MAT_IGNORE_OFF_PROC_ENTRIES || 182 op == MAT_USE_HASH_TABLE) { 183 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 184 } else if (op == MAT_NO_NEW_DIAGONALS) { 185 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 186 } else { 187 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatGetSize_SeqBAIJ" 195 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 196 { 197 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 198 199 PetscFunctionBegin; 200 if (m) *m = a->m; 201 if (n) *n = a->n; 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNC__ 206 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 207 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 208 { 209 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 210 211 PetscFunctionBegin; 212 *m = 0; *n = a->m; 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNC__ 217 #define __FUNC__ "MatGetRow_SeqBAIJ" 218 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 219 { 220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 221 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 222 MatScalar *aa,*aa_i; 223 Scalar *v_i; 224 225 PetscFunctionBegin; 226 bs = a->bs; 227 ai = a->i; 228 aj = a->j; 229 aa = a->a; 230 bs2 = a->bs2; 231 232 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 233 234 bn = row/bs; /* Block number */ 235 bp = row % bs; /* Block Position */ 236 M = ai[bn+1] - ai[bn]; 237 *nz = bs*M; 238 239 if (v) { 240 *v = 0; 241 if (*nz) { 242 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v); 243 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 244 v_i = *v + i*bs; 245 aa_i = aa + bs2*(ai[bn] + i); 246 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 247 } 248 } 249 } 250 251 if (idx) { 252 *idx = 0; 253 if (*nz) { 254 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx); 255 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 256 idx_i = *idx + i*bs; 257 itmp = bs*aj[ai[bn] + i]; 258 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 259 } 260 } 261 } 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNC__ 266 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 267 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 268 { 269 int ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNC__ 278 #define __FUNC__ "MatTranspose_SeqBAIJ" 279 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 280 { 281 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 282 Mat C; 283 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 284 int *rows,*cols,bs2=a->bs2; 285 MatScalar *array = a->a; 286 287 PetscFunctionBegin; 288 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 289 col = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 290 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 291 292 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 293 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 294 ierr = PetscFree(col);CHKERRQ(ierr); 295 rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 296 cols = rows + bs; 297 for ( i=0; i<mbs; i++ ) { 298 cols[0] = i*bs; 299 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 300 len = ai[i+1] - ai[i]; 301 for ( j=0; j<len; j++ ) { 302 rows[0] = (*aj++)*bs; 303 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 304 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 305 array += bs2; 306 } 307 } 308 ierr = PetscFree(rows);CHKERRQ(ierr); 309 310 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 311 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 312 313 if (B != PETSC_NULL) { 314 *B = C; 315 } else { 316 PetscOps *Abops; 317 MatOps Aops; 318 319 /* This isn't really an in-place transpose */ 320 ierr = PetscFree(a->a);CHKERRQ(ierr); 321 if (!a->singlemalloc) { 322 ierr = PetscFree(a->i);CHKERRQ(ierr); 323 ierr = PetscFree(a->j);CHKERRQ(ierr); 324 } 325 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 326 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 327 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 328 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 329 ierr = PetscFree(a);CHKERRQ(ierr); 330 331 332 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 333 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 334 335 /* 336 This is horrible, horrible code. We need to keep the 337 A pointers for the bops and ops but copy everything 338 else from C. 339 */ 340 Abops = A->bops; 341 Aops = A->ops; 342 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 343 A->bops = Abops; 344 A->ops = Aops; 345 A->qlist = 0; 346 347 PetscHeaderDestroy(C); 348 } 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNC__ 353 #define __FUNC__ "MatView_SeqBAIJ_Binary" 354 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 355 { 356 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 357 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 358 Scalar *aa; 359 FILE *file; 360 361 PetscFunctionBegin; 362 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 363 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 364 col_lens[0] = MAT_COOKIE; 365 366 col_lens[1] = a->m; 367 col_lens[2] = a->n; 368 col_lens[3] = a->nz*bs2; 369 370 /* store lengths of each row and write (including header) to file */ 371 count = 0; 372 for ( i=0; i<a->mbs; i++ ) { 373 for ( j=0; j<bs; j++ ) { 374 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 375 } 376 } 377 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 378 ierr = PetscFree(col_lens);CHKERRQ(ierr); 379 380 /* store column indices (zero start index) */ 381 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj); 382 count = 0; 383 for ( i=0; i<a->mbs; i++ ) { 384 for ( j=0; j<bs; j++ ) { 385 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 386 for ( l=0; l<bs; l++ ) { 387 jj[count++] = bs*a->j[k] + l; 388 } 389 } 390 } 391 } 392 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 393 ierr = PetscFree(jj);CHKERRQ(ierr); 394 395 /* store nonzero values */ 396 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 397 count = 0; 398 for ( i=0; i<a->mbs; i++ ) { 399 for ( j=0; j<bs; j++ ) { 400 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 401 for ( l=0; l<bs; l++ ) { 402 aa[count++] = a->a[bs2*k + l*bs + j]; 403 } 404 } 405 } 406 } 407 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 408 ierr = PetscFree(aa);CHKERRQ(ierr); 409 410 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 411 if (file) { 412 fprintf(file,"-matload_block_size %d\n",a->bs); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 419 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 420 { 421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 422 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 423 char *outputname; 424 425 PetscFunctionBegin; 426 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 427 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 428 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 429 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 430 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 431 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 432 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 433 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 434 for ( i=0; i<a->mbs; i++ ) { 435 for ( j=0; j<bs; j++ ) { 436 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 437 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 438 for ( l=0; l<bs; l++ ) { 439 #if defined(PETSC_USE_COMPLEX) 440 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 441 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 442 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 443 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 444 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 445 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 446 } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) { 447 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 448 } 449 #else 450 if (a->a[bs2*k + l*bs + j] != 0.0) { 451 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 452 } 453 #endif 454 } 455 } 456 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 457 } 458 } 459 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 460 } else { 461 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 462 for ( i=0; i<a->mbs; i++ ) { 463 for ( j=0; j<bs; j++ ) { 464 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 465 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 466 for ( l=0; l<bs; l++ ) { 467 #if defined(PETSC_USE_COMPLEX) 468 if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) { 469 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 470 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 471 } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) { 472 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 473 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 474 } else { 475 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 476 } 477 #else 478 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 479 #endif 480 } 481 } 482 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 483 } 484 } 485 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 486 } 487 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 493 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 494 { 495 Mat A = (Mat) Aa; 496 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 497 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 498 double xl,yl,xr,yr,x_l,x_r,y_l,y_r; 499 MatScalar *aa; 500 MPI_Comm comm; 501 Viewer viewer; 502 503 PetscFunctionBegin; 504 /* 505 This is nasty. If this is called from an originally parallel matrix 506 then all processes call this, but only the first has the matrix so the 507 rest should return immediately. 508 */ 509 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 510 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 511 if (rank) PetscFunctionReturn(0); 512 513 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr); 514 515 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 516 517 /* loop over matrix elements drawing boxes */ 518 color = DRAW_BLUE; 519 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 520 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 521 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 522 x_l = a->j[j]*bs; x_r = x_l + 1.0; 523 aa = a->a + j*bs2; 524 for ( k=0; k<bs; k++ ) { 525 for ( l=0; l<bs; l++ ) { 526 if (PetscReal(*aa++) >= 0.) continue; 527 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 528 } 529 } 530 } 531 } 532 color = DRAW_CYAN; 533 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 534 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 535 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 536 x_l = a->j[j]*bs; x_r = x_l + 1.0; 537 aa = a->a + j*bs2; 538 for ( k=0; k<bs; k++ ) { 539 for ( l=0; l<bs; l++ ) { 540 if (PetscReal(*aa++) != 0.) continue; 541 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 542 } 543 } 544 } 545 } 546 547 color = DRAW_RED; 548 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 549 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 550 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 551 x_l = a->j[j]*bs; x_r = x_l + 1.0; 552 aa = a->a + j*bs2; 553 for ( k=0; k<bs; k++ ) { 554 for ( l=0; l<bs; l++ ) { 555 if (PetscReal(*aa++) <= 0.) continue; 556 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNC__ 565 #define __FUNC__ "MatView_SeqBAIJ_Draw" 566 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 567 { 568 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 569 int ierr; 570 double xl,yl,xr,yr,w,h; 571 Draw draw; 572 PetscTruth isnull; 573 574 PetscFunctionBegin; 575 576 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 577 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 578 579 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 580 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 581 xr += w; yr += h; xl = -w; yl = -h; 582 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 583 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 584 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatView_SeqBAIJ" 590 int MatView_SeqBAIJ(Mat A,Viewer viewer) 591 { 592 int ierr; 593 PetscTruth issocket,isascii,isbinary,isdraw; 594 595 PetscFunctionBegin; 596 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 600 if (issocket) { 601 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 602 } else if (isascii){ 603 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 604 } else if (isbinary) { 605 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 606 } else if (isdraw) { 607 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 608 } else { 609 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 610 } 611 PetscFunctionReturn(0); 612 } 613 614 615 #undef __FUNC__ 616 #define __FUNC__ "MatGetValues_SeqBAIJ" 617 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 618 { 619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 620 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 621 int *ai = a->i, *ailen = a->ilen; 622 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 623 MatScalar *ap, *aa = a->a, zero = 0.0; 624 625 PetscFunctionBegin; 626 for ( k=0; k<m; k++ ) { /* loop over rows */ 627 row = im[k]; brow = row/bs; 628 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 629 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 630 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 631 nrow = ailen[brow]; 632 for ( l=0; l<n; l++ ) { /* loop over columns */ 633 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 634 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 635 col = in[l] ; 636 bcol = col/bs; 637 cidx = col%bs; 638 ridx = row%bs; 639 high = nrow; 640 low = 0; /* assume unsorted */ 641 while (high-low > 5) { 642 t = (low+high)/2; 643 if (rp[t] > bcol) high = t; 644 else low = t; 645 } 646 for ( i=low; i<high; i++ ) { 647 if (rp[i] > bcol) break; 648 if (rp[i] == bcol) { 649 *v++ = ap[bs2*i+bs*cidx+ridx]; 650 goto finished; 651 } 652 } 653 *v++ = zero; 654 finished:; 655 } 656 } 657 PetscFunctionReturn(0); 658 } 659 660 661 #undef __FUNC__ 662 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 663 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 664 { 665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 666 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 667 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 668 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 669 Scalar *value = v; 670 MatScalar *ap,*aa=a->a,*bap; 671 672 PetscFunctionBegin; 673 if (roworiented) { 674 stepval = (n-1)*bs; 675 } else { 676 stepval = (m-1)*bs; 677 } 678 for ( k=0; k<m; k++ ) { /* loop over added rows */ 679 row = im[k]; 680 if (row < 0) continue; 681 #if defined(PETSC_USE_BOPT_g) 682 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 683 #endif 684 rp = aj + ai[row]; 685 ap = aa + bs2*ai[row]; 686 rmax = imax[row]; 687 nrow = ailen[row]; 688 low = 0; 689 for ( l=0; l<n; l++ ) { /* loop over added columns */ 690 if (in[l] < 0) continue; 691 #if defined(PETSC_USE_BOPT_g) 692 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 693 #endif 694 col = in[l]; 695 if (roworiented) { 696 value = v + k*(stepval+bs)*bs + l*bs; 697 } else { 698 value = v + l*(stepval+bs)*bs + k*bs; 699 } 700 if (!sorted) low = 0; high = nrow; 701 while (high-low > 7) { 702 t = (low+high)/2; 703 if (rp[t] > col) high = t; 704 else low = t; 705 } 706 for ( i=low; i<high; i++ ) { 707 if (rp[i] > col) break; 708 if (rp[i] == col) { 709 bap = ap + bs2*i; 710 if (roworiented) { 711 if (is == ADD_VALUES) { 712 for ( ii=0; ii<bs; ii++,value+=stepval ) { 713 for (jj=ii; jj<bs2; jj+=bs ) { 714 bap[jj] += *value++; 715 } 716 } 717 } else { 718 for ( ii=0; ii<bs; ii++,value+=stepval ) { 719 for (jj=ii; jj<bs2; jj+=bs ) { 720 bap[jj] = *value++; 721 } 722 } 723 } 724 } else { 725 if (is == ADD_VALUES) { 726 for ( ii=0; ii<bs; ii++,value+=stepval ) { 727 for (jj=0; jj<bs; jj++ ) { 728 *bap++ += *value++; 729 } 730 } 731 } else { 732 for ( ii=0; ii<bs; ii++,value+=stepval ) { 733 for (jj=0; jj<bs; jj++ ) { 734 *bap++ = *value++; 735 } 736 } 737 } 738 } 739 goto noinsert2; 740 } 741 } 742 if (nonew == 1) goto noinsert2; 743 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 744 if (nrow >= rmax) { 745 /* there is no extra room in row, therefore enlarge */ 746 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 747 MatScalar *new_a; 748 749 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 750 751 /* malloc new storage space */ 752 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 753 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 754 new_j = (int *) (new_a + bs2*new_nz); 755 new_i = new_j + new_nz; 756 757 /* copy over old data into new slots */ 758 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 759 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 760 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 761 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 762 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 763 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 764 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 765 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 766 /* free up old matrix storage */ 767 ierr = PetscFree(a->a);CHKERRQ(ierr); 768 if (!a->singlemalloc) { 769 ierr = PetscFree(a->i);CHKERRQ(ierr); 770 ierr = PetscFree(a->j);CHKERRQ(ierr); 771 } 772 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 773 a->singlemalloc = 1; 774 775 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 776 rmax = imax[row] = imax[row] + CHUNKSIZE; 777 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 778 a->maxnz += bs2*CHUNKSIZE; 779 a->reallocs++; 780 a->nz++; 781 } 782 N = nrow++ - 1; 783 /* shift up all the later entries in this row */ 784 for ( ii=N; ii>=i; ii-- ) { 785 rp[ii+1] = rp[ii]; 786 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 787 } 788 if (N >= i) { 789 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 790 } 791 rp[i] = col; 792 bap = ap + bs2*i; 793 if (roworiented) { 794 for ( ii=0; ii<bs; ii++,value+=stepval) { 795 for (jj=ii; jj<bs2; jj+=bs ) { 796 bap[jj] = *value++; 797 } 798 } 799 } else { 800 for ( ii=0; ii<bs; ii++,value+=stepval ) { 801 for (jj=0; jj<bs; jj++ ) { 802 *bap++ = *value++; 803 } 804 } 805 } 806 noinsert2:; 807 low = i; 808 } 809 ailen[row] = nrow; 810 } 811 PetscFunctionReturn(0); 812 } 813 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 817 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 820 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 821 int m = a->m,*ip, N, *ailen = a->ilen; 822 int mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr; 823 MatScalar *aa = a->a, *ap; 824 825 PetscFunctionBegin; 826 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 827 828 if (m) rmax = ailen[0]; 829 for ( i=1; i<mbs; i++ ) { 830 /* move each row back by the amount of empty slots (fshift) before it*/ 831 fshift += imax[i-1] - ailen[i-1]; 832 rmax = PetscMax(rmax,ailen[i]); 833 if (fshift) { 834 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 835 N = ailen[i]; 836 for ( j=0; j<N; j++ ) { 837 ip[j-fshift] = ip[j]; 838 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 839 } 840 } 841 ai[i] = ai[i-1] + ailen[i-1]; 842 } 843 if (mbs) { 844 fshift += imax[mbs-1] - ailen[mbs-1]; 845 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 846 } 847 /* reset ilen and imax for each row */ 848 for ( i=0; i<mbs; i++ ) { 849 ailen[i] = imax[i] = ai[i+1] - ai[i]; 850 } 851 a->nz = ai[mbs]; 852 853 /* diagonals may have moved, so kill the diagonal pointers */ 854 if (fshift && a->diag) { 855 ierr = PetscFree(a->diag);CHKERRQ(ierr); 856 PLogObjectMemory(A,-(m+1)*sizeof(int)); 857 a->diag = 0; 858 } 859 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 860 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 861 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 862 a->reallocs); 863 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 864 a->reallocs = 0; 865 A->info.nz_unneeded = (double)fshift*bs2; 866 867 PetscFunctionReturn(0); 868 } 869 870 871 872 /* 873 This function returns an array of flags which indicate the locations of contiguous 874 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 875 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 876 Assume: sizes should be long enough to hold all the values. 877 */ 878 #undef __FUNC__ 879 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 880 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 881 { 882 int i,j,k,row; 883 PetscTruth flg; 884 885 PetscFunctionBegin; 886 for ( i=0,j=0; i<n; j++ ) { 887 row = idx[i]; 888 if (row%bs!=0) { /* Not the begining of a block */ 889 sizes[j] = 1; 890 i++; 891 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 892 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 893 i++; 894 } else { /* Begining of the block, so check if the complete block exists */ 895 flg = PETSC_TRUE; 896 for ( k=1; k<bs; k++ ) { 897 if (row+k != idx[i+k]) { /* break in the block */ 898 flg = PETSC_FALSE; 899 break; 900 } 901 } 902 if (flg == PETSC_TRUE) { /* No break in the bs */ 903 sizes[j] = bs; 904 i+= bs; 905 } else { 906 sizes[j] = 1; 907 i++; 908 } 909 } 910 } 911 *bs_max = j; 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNC__ 916 #define __FUNC__ "MatZeroRows_SeqBAIJ" 917 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 918 { 919 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 920 int ierr,i,j,k,count,is_n,*is_idx,*rows; 921 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 922 Scalar zero = 0.0; 923 MatScalar *aa; 924 925 PetscFunctionBegin; 926 /* Make a copy of the IS and sort it */ 927 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 928 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 929 930 /* allocate memory for rows,sizes */ 931 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 932 sizes = rows + is_n; 933 934 /* initialize copy IS valurs to rows, and sort them */ 935 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 936 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 937 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 938 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 939 940 for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) { 941 row = rows[j]; 942 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 943 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 944 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 945 if (sizes[i] == bs) { 946 if (diag) { 947 if (baij->ilen[row/bs] > 0) { 948 baij->ilen[row/bs] = 1; 949 baij->j[baij->i[row/bs]] = row/bs; 950 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 951 } 952 /* Now insert all the diagoanl values for this bs */ 953 for ( k=0; k<bs; k++ ) { 954 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 955 } 956 } else { /* (!diag) */ 957 baij->ilen[row/bs] = 0; 958 } /* end (!diag) */ 959 } else { /* (sizes[i] != bs) */ 960 #if defined (PETSC_USE_DEBUG) 961 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 962 #endif 963 for ( k=0; k<count; k++ ) { 964 aa[0] = zero; 965 aa+=bs; 966 } 967 if (diag) { 968 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 969 } 970 } 971 } 972 973 ierr = PetscFree(rows);CHKERRQ(ierr); 974 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNC__ 979 #define __FUNC__ "MatSetValues_SeqBAIJ" 980 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 981 { 982 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 983 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 984 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 985 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 986 int ridx,cidx,bs2=a->bs2,ierr; 987 MatScalar *ap,value,*aa=a->a,*bap; 988 989 PetscFunctionBegin; 990 for ( k=0; k<m; k++ ) { /* loop over added rows */ 991 row = im[k]; brow = row/bs; 992 if (row < 0) continue; 993 #if defined(PETSC_USE_BOPT_g) 994 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 995 #endif 996 rp = aj + ai[brow]; 997 ap = aa + bs2*ai[brow]; 998 rmax = imax[brow]; 999 nrow = ailen[brow]; 1000 low = 0; 1001 for ( l=0; l<n; l++ ) { /* loop over added columns */ 1002 if (in[l] < 0) continue; 1003 #if defined(PETSC_USE_BOPT_g) 1004 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1005 #endif 1006 col = in[l]; bcol = col/bs; 1007 ridx = row % bs; cidx = col % bs; 1008 if (roworiented) { 1009 value = v[l + k*n]; 1010 } else { 1011 value = v[k + l*m]; 1012 } 1013 if (!sorted) low = 0; high = nrow; 1014 while (high-low > 7) { 1015 t = (low+high)/2; 1016 if (rp[t] > bcol) high = t; 1017 else low = t; 1018 } 1019 for ( i=low; i<high; i++ ) { 1020 if (rp[i] > bcol) break; 1021 if (rp[i] == bcol) { 1022 bap = ap + bs2*i + bs*cidx + ridx; 1023 if (is == ADD_VALUES) *bap += value; 1024 else *bap = value; 1025 goto noinsert1; 1026 } 1027 } 1028 if (nonew == 1) goto noinsert1; 1029 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1030 if (nrow >= rmax) { 1031 /* there is no extra room in row, therefore enlarge */ 1032 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1033 MatScalar *new_a; 1034 1035 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1036 1037 /* Malloc new storage space */ 1038 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1039 new_a = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a); 1040 new_j = (int *) (new_a + bs2*new_nz); 1041 new_i = new_j + new_nz; 1042 1043 /* copy over old data into new slots */ 1044 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 1045 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1046 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1047 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1048 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1049 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1050 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1051 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1052 /* free up old matrix storage */ 1053 ierr = PetscFree(a->a);CHKERRQ(ierr); 1054 if (!a->singlemalloc) { 1055 ierr = PetscFree(a->i);CHKERRQ(ierr); 1056 ierr = PetscFree(a->j);CHKERRQ(ierr); 1057 } 1058 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1059 a->singlemalloc = 1; 1060 1061 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1062 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1063 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1064 a->maxnz += bs2*CHUNKSIZE; 1065 a->reallocs++; 1066 a->nz++; 1067 } 1068 N = nrow++ - 1; 1069 /* shift up all the later entries in this row */ 1070 for ( ii=N; ii>=i; ii-- ) { 1071 rp[ii+1] = rp[ii]; 1072 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1073 } 1074 if (N>=i) { 1075 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1076 } 1077 rp[i] = bcol; 1078 ap[bs2*i + bs*cidx + ridx] = value; 1079 noinsert1:; 1080 low = i; 1081 } 1082 ailen[brow] = nrow; 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1088 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1089 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1090 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1091 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1092 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec); 1093 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1094 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1095 extern int MatNorm_SeqBAIJ(Mat,NormType,double *); 1096 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*); 1097 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1098 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1099 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1100 extern int MatZeroEntries_SeqBAIJ(Mat); 1101 1102 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1103 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1104 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1105 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1106 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1107 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1108 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1109 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1110 1111 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1112 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1113 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1114 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1115 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1116 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1117 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1118 1119 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1120 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1121 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1122 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1123 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1124 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1125 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1126 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1127 1128 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1129 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1130 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1131 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1132 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1133 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1134 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1135 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1136 1137 #undef __FUNC__ 1138 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1139 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1140 { 1141 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1142 Mat outA; 1143 int ierr; 1144 PetscTruth row_identity, col_identity; 1145 1146 PetscFunctionBegin; 1147 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1148 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1149 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1150 if (!row_identity || !col_identity) { 1151 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1152 } 1153 1154 outA = inA; 1155 inA->factor = FACTOR_LU; 1156 a->row = row; 1157 a->col = col; 1158 1159 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1160 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1161 PLogObjectParent(inA,a->icol); 1162 1163 if (!a->solve_work) { 1164 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1165 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1166 } 1167 1168 if (!a->diag) { 1169 ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr); 1170 } 1171 /* 1172 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1173 for ILU(0) factorization with natural ordering 1174 */ 1175 switch (a->bs) { 1176 case 2: 1177 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1178 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1179 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1180 break; 1181 case 3: 1182 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1183 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1184 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1185 break; 1186 case 4: 1187 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1188 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1189 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1190 break; 1191 case 5: 1192 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1193 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1194 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1195 break; 1196 case 6: 1197 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1198 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1199 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1200 break; 1201 case 7: 1202 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1203 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1204 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1205 break; 1206 } 1207 1208 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1209 1210 PetscFunctionReturn(0); 1211 } 1212 #undef __FUNC__ 1213 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1214 int MatPrintHelp_SeqBAIJ(Mat A) 1215 { 1216 static int called = 0; 1217 MPI_Comm comm = A->comm; 1218 int ierr; 1219 1220 PetscFunctionBegin; 1221 if (called) {PetscFunctionReturn(0);} else called = 1; 1222 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1223 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 EXTERN_C_BEGIN 1228 #undef __FUNC__ 1229 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1230 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1231 { 1232 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1233 int i,nz,n; 1234 1235 PetscFunctionBegin; 1236 nz = baij->maxnz; 1237 n = baij->n; 1238 for (i=0; i<nz; i++) { 1239 baij->j[i] = indices[i]; 1240 } 1241 baij->nz = nz; 1242 for ( i=0; i<n; i++ ) { 1243 baij->ilen[i] = baij->imax[i]; 1244 } 1245 1246 PetscFunctionReturn(0); 1247 } 1248 EXTERN_C_END 1249 1250 #undef __FUNC__ 1251 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1252 /*@ 1253 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1254 in the matrix. 1255 1256 Input Parameters: 1257 + mat - the SeqBAIJ matrix 1258 - indices - the column indices 1259 1260 Level: advanced 1261 1262 Notes: 1263 This can be called if you have precomputed the nonzero structure of the 1264 matrix and want to provide it to the matrix object to improve the performance 1265 of the MatSetValues() operation. 1266 1267 You MUST have set the correct numbers of nonzeros per row in the call to 1268 MatCreateSeqBAIJ(). 1269 1270 MUST be called before any calls to MatSetValues(); 1271 1272 @*/ 1273 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1274 { 1275 int ierr,(*f)(Mat,int *); 1276 1277 PetscFunctionBegin; 1278 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1279 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1280 if (f) { 1281 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1282 } else { 1283 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1284 } 1285 PetscFunctionReturn(0); 1286 } 1287 1288 /* -------------------------------------------------------------------*/ 1289 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1290 MatGetRow_SeqBAIJ, 1291 MatRestoreRow_SeqBAIJ, 1292 MatMult_SeqBAIJ_N, 1293 MatMultAdd_SeqBAIJ_N, 1294 MatMultTrans_SeqBAIJ, 1295 MatMultTransAdd_SeqBAIJ, 1296 MatSolve_SeqBAIJ_N, 1297 0, 1298 0, 1299 0, 1300 MatLUFactor_SeqBAIJ, 1301 0, 1302 0, 1303 MatTranspose_SeqBAIJ, 1304 MatGetInfo_SeqBAIJ, 1305 MatEqual_SeqBAIJ, 1306 MatGetDiagonal_SeqBAIJ, 1307 MatDiagonalScale_SeqBAIJ, 1308 MatNorm_SeqBAIJ, 1309 0, 1310 MatAssemblyEnd_SeqBAIJ, 1311 0, 1312 MatSetOption_SeqBAIJ, 1313 MatZeroEntries_SeqBAIJ, 1314 MatZeroRows_SeqBAIJ, 1315 MatLUFactorSymbolic_SeqBAIJ, 1316 MatLUFactorNumeric_SeqBAIJ_N, 1317 0, 1318 0, 1319 MatGetSize_SeqBAIJ, 1320 MatGetSize_SeqBAIJ, 1321 MatGetOwnershipRange_SeqBAIJ, 1322 MatILUFactorSymbolic_SeqBAIJ, 1323 0, 1324 0, 1325 0, 1326 MatDuplicate_SeqBAIJ, 1327 0, 1328 0, 1329 MatILUFactor_SeqBAIJ, 1330 0, 1331 0, 1332 MatGetSubMatrices_SeqBAIJ, 1333 MatIncreaseOverlap_SeqBAIJ, 1334 MatGetValues_SeqBAIJ, 1335 0, 1336 MatPrintHelp_SeqBAIJ, 1337 MatScale_SeqBAIJ, 1338 0, 1339 0, 1340 0, 1341 MatGetBlockSize_SeqBAIJ, 1342 MatGetRowIJ_SeqBAIJ, 1343 MatRestoreRowIJ_SeqBAIJ, 1344 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 MatSetValuesBlocked_SeqBAIJ, 1351 MatGetSubMatrix_SeqBAIJ, 1352 0, 1353 0, 1354 MatGetMaps_Petsc}; 1355 1356 EXTERN_C_BEGIN 1357 #undef __FUNC__ 1358 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1359 int MatStoreValues_SeqBAIJ(Mat mat) 1360 { 1361 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1362 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1363 int ierr; 1364 1365 PetscFunctionBegin; 1366 if (aij->nonew != 1) { 1367 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1368 } 1369 1370 /* allocate space for values if not already there */ 1371 if (!aij->saved_values) { 1372 aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1373 } 1374 1375 /* copy values over */ 1376 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 EXTERN_C_END 1380 1381 EXTERN_C_BEGIN 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1384 int MatRetrieveValues_SeqBAIJ(Mat mat) 1385 { 1386 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1387 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1388 1389 PetscFunctionBegin; 1390 if (aij->nonew != 1) { 1391 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1392 } 1393 if (!aij->saved_values) { 1394 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1395 } 1396 1397 /* copy values over */ 1398 ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 EXTERN_C_END 1402 1403 #undef __FUNC__ 1404 #define __FUNC__ "MatCreateSeqBAIJ" 1405 /*@C 1406 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1407 compressed row) format. For good matrix assembly performance the 1408 user should preallocate the matrix storage by setting the parameter nz 1409 (or the array nnz). By setting these parameters accurately, performance 1410 during matrix assembly can be increased by more than a factor of 50. 1411 1412 Collective on MPI_Comm 1413 1414 Input Parameters: 1415 + comm - MPI communicator, set to PETSC_COMM_SELF 1416 . bs - size of block 1417 . m - number of rows 1418 . n - number of columns 1419 . nz - number of block nonzeros per block row (same for all rows) 1420 - nnz - array containing the number of block nonzeros in the various block rows 1421 (possibly different for each block row) or PETSC_NULL 1422 1423 Output Parameter: 1424 . A - the matrix 1425 1426 Options Database Keys: 1427 . -mat_no_unroll - uses code that does not unroll the loops in the 1428 block calculations (much slower) 1429 . -mat_block_size - size of the blocks to use 1430 1431 Level: intermediate 1432 1433 Notes: 1434 The block AIJ format is fully compatible with standard Fortran 77 1435 storage. That is, the stored row and column indices can begin at 1436 either one (as in Fortran) or zero. See the users' manual for details. 1437 1438 Specify the preallocated storage with either nz or nnz (not both). 1439 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1440 allocation. For additional details, see the users manual chapter on 1441 matrices. 1442 1443 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1444 @*/ 1445 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1446 { 1447 Mat B; 1448 Mat_SeqBAIJ *b; 1449 int i,len,ierr,flg,mbs,nbs,bs2,size; 1450 1451 PetscFunctionBegin; 1452 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1453 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1454 1455 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1456 mbs = m/bs; 1457 nbs = n/bs; 1458 bs2 = bs*bs; 1459 1460 if (mbs*bs!=m || nbs*bs!=n) { 1461 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1462 } 1463 1464 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1465 if (nnz) { 1466 for (i=0; i<mbs; i++) { 1467 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1468 } 1469 } 1470 1471 *A = 0; 1472 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1473 PLogObjectCreate(B); 1474 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1475 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1476 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1477 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1478 if (!flg) { 1479 switch (bs) { 1480 case 1: 1481 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1482 B->ops->solve = MatSolve_SeqBAIJ_1; 1483 B->ops->mult = MatMult_SeqBAIJ_1; 1484 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1485 break; 1486 case 2: 1487 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1488 B->ops->solve = MatSolve_SeqBAIJ_2; 1489 B->ops->mult = MatMult_SeqBAIJ_2; 1490 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1491 break; 1492 case 3: 1493 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1494 B->ops->solve = MatSolve_SeqBAIJ_3; 1495 B->ops->mult = MatMult_SeqBAIJ_3; 1496 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1497 break; 1498 case 4: 1499 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1500 B->ops->solve = MatSolve_SeqBAIJ_4; 1501 B->ops->mult = MatMult_SeqBAIJ_4; 1502 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1503 break; 1504 case 5: 1505 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1506 B->ops->solve = MatSolve_SeqBAIJ_5; 1507 B->ops->mult = MatMult_SeqBAIJ_5; 1508 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1509 break; 1510 case 6: 1511 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1512 B->ops->solve = MatSolve_SeqBAIJ_6; 1513 B->ops->mult = MatMult_SeqBAIJ_6; 1514 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1515 break; 1516 case 7: 1517 B->ops->mult = MatMult_SeqBAIJ_7; 1518 B->ops->solve = MatSolve_SeqBAIJ_7; 1519 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1520 break; 1521 } 1522 } 1523 B->ops->destroy = MatDestroy_SeqBAIJ; 1524 B->ops->view = MatView_SeqBAIJ; 1525 B->factor = 0; 1526 B->lupivotthreshold = 1.0; 1527 B->mapping = 0; 1528 b->row = 0; 1529 b->col = 0; 1530 b->icol = 0; 1531 b->reallocs = 0; 1532 b->saved_values = 0; 1533 1534 b->m = m; B->m = m; B->M = m; 1535 b->n = n; B->n = n; B->N = n; 1536 1537 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1538 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1539 1540 b->mbs = mbs; 1541 b->nbs = nbs; 1542 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax); 1543 if (nnz == PETSC_NULL) { 1544 if (nz == PETSC_DEFAULT) nz = 5; 1545 else if (nz <= 0) nz = 1; 1546 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1547 nz = nz*mbs; 1548 } else { 1549 nz = 0; 1550 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1551 } 1552 1553 /* allocate the matrix space */ 1554 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1555 b->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a); 1556 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1557 b->j = (int *) (b->a + nz*bs2); 1558 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1559 b->i = b->j + nz; 1560 b->singlemalloc = 1; 1561 1562 b->i[0] = 0; 1563 for (i=1; i<mbs+1; i++) { 1564 b->i[i] = b->i[i-1] + b->imax[i-1]; 1565 } 1566 1567 /* b->ilen will count nonzeros in each block row so far. */ 1568 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1569 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1570 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1571 1572 b->bs = bs; 1573 b->bs2 = bs2; 1574 b->mbs = mbs; 1575 b->nz = 0; 1576 b->maxnz = nz*bs2; 1577 b->sorted = 0; 1578 b->roworiented = 1; 1579 b->nonew = 0; 1580 b->diag = 0; 1581 b->solve_work = 0; 1582 b->mult_work = 0; 1583 b->spptr = 0; 1584 B->info.nz_unneeded = (double)b->maxnz; 1585 1586 *A = B; 1587 ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr); 1588 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1589 1590 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1591 "MatStoreValues_SeqBAIJ", 1592 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1594 "MatRetrieveValues_SeqBAIJ", 1595 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1597 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1598 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNC__ 1603 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1604 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1605 { 1606 Mat C; 1607 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1608 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1609 1610 PetscFunctionBegin; 1611 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1612 1613 *B = 0; 1614 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1615 PLogObjectCreate(C); 1616 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1617 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1618 C->ops->destroy = MatDestroy_SeqBAIJ; 1619 C->ops->view = MatView_SeqBAIJ; 1620 C->factor = A->factor; 1621 c->row = 0; 1622 c->col = 0; 1623 c->icol = 0; 1624 c->saved_values = 0; 1625 C->assembled = PETSC_TRUE; 1626 1627 c->m = C->m = a->m; 1628 c->n = C->n = a->n; 1629 C->M = a->m; 1630 C->N = a->n; 1631 1632 c->bs = a->bs; 1633 c->bs2 = a->bs2; 1634 c->mbs = a->mbs; 1635 c->nbs = a->nbs; 1636 1637 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1638 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1639 for ( i=0; i<mbs; i++ ) { 1640 c->imax[i] = a->imax[i]; 1641 c->ilen[i] = a->ilen[i]; 1642 } 1643 1644 /* allocate the matrix space */ 1645 c->singlemalloc = 1; 1646 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1647 c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a); 1648 c->j = (int *) (c->a + nz*bs2); 1649 c->i = c->j + nz; 1650 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1651 if (mbs > 0) { 1652 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1653 if (cpvalues == MAT_COPY_VALUES) { 1654 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1655 } else { 1656 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1657 } 1658 } 1659 1660 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1661 c->sorted = a->sorted; 1662 c->roworiented = a->roworiented; 1663 c->nonew = a->nonew; 1664 1665 if (a->diag) { 1666 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag); 1667 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1668 for ( i=0; i<mbs; i++ ) { 1669 c->diag[i] = a->diag[i]; 1670 } 1671 } else c->diag = 0; 1672 c->nz = a->nz; 1673 c->maxnz = a->maxnz; 1674 c->solve_work = 0; 1675 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1676 c->mult_work = 0; 1677 *B = C; 1678 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNC__ 1683 #define __FUNC__ "MatLoad_SeqBAIJ" 1684 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1685 { 1686 Mat_SeqBAIJ *a; 1687 Mat B; 1688 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1689 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1690 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1691 int *masked, nmask,tmp,bs2,ishift; 1692 Scalar *aa; 1693 MPI_Comm comm = ((PetscObject) viewer)->comm; 1694 1695 PetscFunctionBegin; 1696 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1697 bs2 = bs*bs; 1698 1699 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1700 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1701 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1702 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1703 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1704 M = header[1]; N = header[2]; nz = header[3]; 1705 1706 if (header[3] < 0) { 1707 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1708 } 1709 1710 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1711 1712 /* 1713 This code adds extra rows to make sure the number of rows is 1714 divisible by the blocksize 1715 */ 1716 mbs = M/bs; 1717 extra_rows = bs - M + bs*(mbs); 1718 if (extra_rows == bs) extra_rows = 0; 1719 else mbs++; 1720 if (extra_rows) { 1721 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1722 } 1723 1724 /* read in row lengths */ 1725 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1726 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1727 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1728 1729 /* read in column indices */ 1730 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj); 1731 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1732 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1733 1734 /* loop over row lengths determining block row lengths */ 1735 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1736 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1737 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask); 1738 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1739 masked = mask + mbs; 1740 rowcount = 0; nzcount = 0; 1741 for ( i=0; i<mbs; i++ ) { 1742 nmask = 0; 1743 for ( j=0; j<bs; j++ ) { 1744 kmax = rowlengths[rowcount]; 1745 for ( k=0; k<kmax; k++ ) { 1746 tmp = jj[nzcount++]/bs; 1747 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1748 } 1749 rowcount++; 1750 } 1751 browlengths[i] += nmask; 1752 /* zero out the mask elements we set */ 1753 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1754 } 1755 1756 /* create our matrix */ 1757 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1758 B = *A; 1759 a = (Mat_SeqBAIJ *) B->data; 1760 1761 /* set matrix "i" values */ 1762 a->i[0] = 0; 1763 for ( i=1; i<= mbs; i++ ) { 1764 a->i[i] = a->i[i-1] + browlengths[i-1]; 1765 a->ilen[i-1] = browlengths[i-1]; 1766 } 1767 a->nz = 0; 1768 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1769 1770 /* read in nonzero values */ 1771 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1772 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1773 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1774 1775 /* set "a" and "j" values into matrix */ 1776 nzcount = 0; jcount = 0; 1777 for ( i=0; i<mbs; i++ ) { 1778 nzcountb = nzcount; 1779 nmask = 0; 1780 for ( j=0; j<bs; j++ ) { 1781 kmax = rowlengths[i*bs+j]; 1782 for ( k=0; k<kmax; k++ ) { 1783 tmp = jj[nzcount++]/bs; 1784 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1785 } 1786 rowcount++; 1787 } 1788 /* sort the masked values */ 1789 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1790 1791 /* set "j" values into matrix */ 1792 maskcount = 1; 1793 for ( j=0; j<nmask; j++ ) { 1794 a->j[jcount++] = masked[j]; 1795 mask[masked[j]] = maskcount++; 1796 } 1797 /* set "a" values into matrix */ 1798 ishift = bs2*a->i[i]; 1799 for ( j=0; j<bs; j++ ) { 1800 kmax = rowlengths[i*bs+j]; 1801 for ( k=0; k<kmax; k++ ) { 1802 tmp = jj[nzcountb]/bs ; 1803 block = mask[tmp] - 1; 1804 point = jj[nzcountb] - bs*tmp; 1805 idx = ishift + bs2*block + j + bs*point; 1806 a->a[idx] = aa[nzcountb++]; 1807 } 1808 } 1809 /* zero out the mask elements we set */ 1810 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1811 } 1812 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1813 1814 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1815 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1816 ierr = PetscFree(aa);CHKERRQ(ierr); 1817 ierr = PetscFree(jj);CHKERRQ(ierr); 1818 ierr = PetscFree(mask);CHKERRQ(ierr); 1819 1820 B->assembled = PETSC_TRUE; 1821 1822 ierr = MatView_Private(B);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 1827 1828