Skip to content

Genome IGR selection: Genome listed on Rfam cannot be accessed #31

@vohlhauser

Description

@vohlhauser

I wanted to use DIMPL to analyze the genome of Xylella fastidiosa for ncRNA, but the genome that is deposited doesnt seem to work.
Here is a copy of the code in the first Jupyter notebook and the error I get:

upid = 'UP000000812' #Enter your uniprot ID
annotated_df, fig, layout, genome = display_genome(upid)
display(fig)

error Traceback (most recent call last)
/tmp/ipykernel_33/91208399.py in
1 upid = 'UP000000812' #Enter your uniprot ID
----> 2 annotated_df, fig, layout, genome = display_genome(upid)
3 display(fig)

~/work/src/visualization/visualize.py in display_genome(upid)
139 session.close()
140 download_genome(genome)
--> 141 igr_df = extract_igrs(genome, igr_length_cutoff=1)
142 annotated_df = annotate_igrs(genome, igr_df)
143 scatter_plots = graph_genome(annotated_df)

~/work/src/data/make_dataset.py in extract_igrs(genome, igr_length_cutoff)
134
135 # Create a Seqrecord for each "chromosome" in the file
--> 136 for seqrecord in SeqIO.parse(genbank_file, "gb"):
137
138 cds_list = []

/opt/conda/lib/python3.7/site-packages/Bio/SeqIO/Interfaces.py in next(self)
72 def next(self):
73 try:
---> 74 return next(self.records)
75 except Exception:
76 if self.should_close_stream:

/opt/conda/lib/python3.7/site-packages/Bio/GenBank/Scanner.py in parse_records(self, handle, do_features)
514 with as_handle(handle) as handle:
515 while True:
--> 516 record = self.parse(handle, do_features)
517 if record is None:
518 break

/opt/conda/lib/python3.7/site-packages/Bio/GenBank/Scanner.py in parse(self, handle, do_features)
497 )
498
--> 499 if self.feed(handle, consumer, do_features):
500 return consumer.data
501 else:

/opt/conda/lib/python3.7/site-packages/Bio/GenBank/Scanner.py in feed(self, handle, consumer, do_features)
473
474 # Footer and sequence
--> 475 misc_lines, sequence_string = self.parse_footer()
476 self._feed_misc_lines(consumer, misc_lines)
477

/opt/conda/lib/python3.7/site-packages/Bio/GenBank/Scanner.py in parse_footer(self)
1240 raise ValueError("Sequence line mal-formed, '%s'" % line)
1241 seq_lines.append(line[10:]) # remove spaces later
-> 1242 line = self.handle.readline()
1243
1244 self.line = line

/opt/conda/lib/python3.7/gzip.py in read1(self, size)
298 if size < 0:
299 size = io.DEFAULT_BUFFER_SIZE
--> 300 return self._buffer.read1(size)
301
302 def peek(self, n):

/opt/conda/lib/python3.7/_compression.py in readinto(self, b)
66 def readinto(self, b):
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data
70 return len(data)

/opt/conda/lib/python3.7/gzip.py in read(self, size)
480 buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
481
--> 482 uncompress = self._decompressor.decompress(buf, size)
483 if self._decompressor.unconsumed_tail != b"":
484 self._fp.prepend(self._decompressor.unconsumed_tail)

error: Error -3 while decompressing data: invalid stored block lengths

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions