diff --git a/introduction/index.html b/introduction/index.html index f8a63b8..3b1c5ed 100644 --- a/introduction/index.html +++ b/introduction/index.html @@ -62,7 +62,7 @@ "url" : "https://bioinf.shenwei.me/LexicMap/introduction/", "headline": "Introduction", "description": "LexicMap is a nucleotide sequence alignment tool for efficiently querying gene, plasmid, viral, or long-read sequences against up to millions of prokaryotic genomes.\nTable of contents Table of contents Features Introduction Quick start Performance Indexing Searching Installation Algorithm overview Related projects Support License Features LexicMap is scalable to up to millions of prokaryotic genomes. The sensitivity of LexicMap is comparable with Blastn. The alignment is fast and memory-efficient. LexicMap is easy to install, we provide binary files with no dependencies for Linux, Windows, MacOS (x86 and arm CPUs).", - "wordCount" : "1633", + "wordCount" : "1618", "inLanguage": "en", "isFamilyFriendly": "true", "mainEntityOfPage": { @@ -1679,54 +1679,50 @@

Introduction

Export blast-style format:

seqkit seq -M 500 q.long-reads.fasta.gz \
-    | seqkit head -n 2 \
+    | seqkit head -n 1 \
     | lexicmap search -d demo.lmi/ -a \
-    | lexicmap utils 2blast
+    | lexicmap utils 2blast --kv-file-genome ass2species.map
 
-Query = GCF_000017205.1_r160
-Length = 478
+Query = GCF_006742205.1_r100
+Length = 431
 
-[Subject genome #1/1] = GCF_000017205.1
-Query coverage per genome = 95.188%
+[Subject genome #1/1] = GCF_006742205.1 Staphylococcus epidermidis
+Query coverage per genome = 92.575%
 
->NC_009656.1
-Length = 6588339
+>NZ_AP019721.1
+Length = 2422602
 
- HSP #1
- Query coverage per seq = 95.188%, Aligned length = 463, Identities = 95.680%, Gaps = 12
- Query range = 13-467, Subject range = 4866862-4867320, Strand = Plus/Plus
+HSP #1
+Query coverage per seq = 92.575%, Aligned length = 402, Identities = 98.507%, Gaps = 4
+Query range = 33-431, Subject range = 1321677-1322077, Strand = Plus/Minus
 
-Query  13       CCTCAAACGAGTCC-AACAGGCCAACGCCTAGCAATCCCTCCCCTGTGGGGCAGGGAAAA  71
-                |||||||||||||| |||||||| ||||||  | ||||||||||||| ||||||||||||
-Sbjct  4866862  CCTCAAACGAGTCCGAACAGGCCCACGCCTCACGATCCCTCCCCTGTCGGGCAGGGAAAA  4866921
+Query  33       TAAAACGATTGCTAATGAGTCACGTATTTCATCTGGTTCGGTAACTATACCGTCTACTAT  92
+                ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct  1322077  TAAAACGATTGCTAATGAGTCACGTATTTCATCTGGTTCGGTAACTATACCGTCTACTAT  1322018
 
-Query  72       TCGTCCTTTATGGTCCGTTCCGGGCACGCACCGGAACGGCGGTCATCTTCCACGGTGCCC  131
-                |||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||
-Sbjct  4866922  TCGTCCTTTATGGTCCGTTCCGGGCACGCACCGGAACGGCGGTCAT-TTCCACGGTGCCC  4866980
+Query  93       GGACTCAGTGTAACCCTGTAATAAAGAGATTGGCGTACGTAATTCATGTG-TACATTTGC  151
+                |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||
+Sbjct  1322017  GGACTCAGTGTAACCCTGTAATAAAGAGATTGGCGTACGTAATTCATGTGATACATTTGC  1321958
 
-Query  132      GCCCACGGCGGACCCGCGGAAACCGACCCGGGCGCCAAGGCGCCCGGGAACGGAGTA-CA  190
-                ||| ||||||||||| ||||||||||||||||||||||||||||||||||||||||| ||
-Sbjct  4866981  GCC-ACGGCGGACCC-CGGAAACCGACCCGGGCGCCAAGGCGCCCGGGAACGGAGTATCA  4867038
+Query  152      TATAAAATCTTTTTTCATTTGATCAAGATTATGTTCATTTGTCATATCACAGGATGACCA  211
+                |||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||
+Sbjct  1321957  TATAAAATCTTTTTTCATTTGATCAAGATTATGTTCATTTGTCATATCAC-GGATGACCA  1321899
 
-Query  191      CTCGGCGTTCGGCCAGCGACAGC---GACGCGTTGCCGCCCACCGCGGTGGTGTTCACCG  247
-                |||||||| ||||||||||||||   ||||||||||||||||||||||||||||||||||
-Sbjct  4867039  CTCGGCGT-CGGCCAGCGACAGCAGCGACGCGTTGCCGCCCACCGCGGTGGTGTTCACCG  4867097
+Query  212      TGACAATACCACTTCTACCATTTGTTTGAATTCTATCTATATAACTGGAGATAAATACAT  271
+                ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+Sbjct  1321898  TGACAATACCACTTCTACCATTTGTTTGAATTCTATCTATATAACTGGAGATAAATACAT  1321839
 
-Query  248      AGGTGGTGCGCTCGCTGAC-AAACGCAGCAGGTAGTTCGGCCCGCCGGCCTTGGGACCG-  305
-                ||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||
-Sbjct  4867098  AGGTGGTGCGCTCGCTGACGAAACGCAGCAGGTAGTTCGGCCCGCCGGCCTTGGGACCGG  4867157
+Query  272      AGTACCTTGTATTAATTTCTAATTCTAA-TACTCATTCTGTTGTGATTCAAATGGTGCTT  330
+                |||||||||||||||||||||||||||| ||||||||||||||||||||||||| |||||
+Sbjct  1321838  AGTACCTTGTATTAATTTCTAATTCTAAATACTCATTCTGTTGTGATTCAAATGTTGCTT  1321779
 
-Query  306      TGCCGGACAGCCCGTGGCCGCCGAACAGTTGCACGCCCACCACCGCGCCGAT-TGGTTTC  364
-                |||||||||||||||||||||||||| ||||||||||||||||||||||||| ||||| |
-Sbjct  4867158  TGCCGGACAGCCCGTGGCCGCCGAACGGTTGCACGCCCACCACCGCGCCGATCTGGTTGC  4867217
+Query  331      CAATTTGCTGTTCAATAGATTCTTTTGAAAAATCATCAATGTGACGCATAATATAATCAG  390
+                |||||||||||||||||||||||||||||||||||||||||||||||||||||| |||||
+Sbjct  1321778  CAATTTGCTGTTCAATAGATTCTTTTGAAAAATCATCAATGTGACGCATAATATCATCAG  1321719
 
-Query  365      GGTTGACGTAGAGGTTGCCGACCCGCGCCAGCTCTTGGATGCGGCGGGCGGTTTCCTCGT  424
-                |||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||
-Sbjct  4867218  GGTTGACGTAGAGGTTGCCGACCCGCGCCAGCTCTTCGATGCGGCGGGCGGTTTCCTCGT  4867277
-
-Query  425      TGCGGCTGTGGACCCCCATGGTCAGGCCGAAACCGGTGGCGTT  467
-                |||||||||||||||||||||||||||||||||||||||||||
-Sbjct  4867278  TGCGGCTGTGGACCCCCATGGTCAGGCCGAAACCGGTGGCGTT  4867320
+Query  391      CCATCTTGTT-GACAATATGATTTCACGTTGATTATTAATGC  431
+                |||||||||| |||||||||||||||||||||||||||||||
+Sbjct  1321718  CCATCTTGTTTGACAATATGATTTCACGTTGATTATTAATGC  1321677
 

Learn more Step 2. Searching

Exporting blast-like alignment text.

From file:

lexicmap utils 2blast results.tsv -o results.txt
-

From stdin:

+

Add genome annotation

+
lexicmap utils 2blast results.tsv -o results.txt --kv-file-genome ass2species.map
+

From stdin:

# align only one long-read <= 500 bp
 
 $ seqkit seq -M 500 q.long-reads.fasta.gz \
     | seqkit head -n 1 \
     | lexicmap search -d demo.lmi/ -a \
-    | lexicmap utils 2blast
+    | lexicmap utils 2blast --kv-file-genome ass2species.map
 
 Query = GCF_006742205.1_r100
 Length = 431
 
-[Subject genome #1/1] = GCF_006742205.1
+[Subject genome #1/1] = GCF_006742205.1 Staphylococcus epidermidis
 Query coverage per genome = 92.575%
 
 >NZ_AP019721.1