diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 5ca700e..0000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/README.md b/README.md
index 4be97fb..bb6d55d 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 Broccoli, a user-friendly pipeline designed to infer with high precision orthologous groups and pairs of proteins using a phylogeny-based approach. Briefly, Broccoli performs ultra-fast phylogenetic analyses on most proteins and builds a network of orthologous relationships. Orthologous groups are then identified from the network using a parameter-free machine learning algorithm (label propagation). Broccoli is also able to detect chimeric proteins resulting from gene-fusion events and to assign these proteins to the corresponding orthologous groups. 
 
-__Reference:__ <a href="">insert reference</a>
+__Reference:__ <a href="https://doi.org/10.1101/2019.12.13.875831">Broccoli: combining phylogenetic and network analyses for orthology assignment</a>
 
 <p align="center">
   <img width="650" height="auto" src="./images/overview_broccoli.png">
diff --git a/broccoli.py b/broccoli.py
index 5143ded..ba28e6a 100644
--- a/broccoli.py
+++ b/broccoli.py
@@ -33,7 +33,7 @@
 
 def parse_args():
     # define and parse command-line arguments
-    parser = argparse.ArgumentParser(description='            Broccoli v1.01', add_help=False, formatter_class=argparse.RawTextHelpFormatter, epilog=' \n')
+    parser = argparse.ArgumentParser(description='            Broccoli v1.1', add_help=False, formatter_class=argparse.RawTextHelpFormatter, epilog=' \n')
     
     common = parser.add_argument_group(' general options')
     common.add_argument('-steps',         help='steps to be performed, comma separated (default = \'1,2,3,4\')', metavar='', type=str, default='1,2,3,4')    
diff --git a/Manual_broccoli_v1.0.pdf b/manual_Broccoli_v1.1.pdf
similarity index 72%
rename from Manual_broccoli_v1.0.pdf
rename to manual_Broccoli_v1.1.pdf
index 2d9905d..9ea39c7 100644
Binary files a/Manual_broccoli_v1.0.pdf and b/manual_Broccoli_v1.1.pdf differ
diff --git a/scripts/broccoli_step2.py b/scripts/broccoli_step2.py
index 18e71d3..48607a3 100644
--- a/scripts/broccoli_step2.py
+++ b/scripts/broccoli_step2.py
@@ -24,6 +24,7 @@
 import subprocess
 import gzip
 import math
+import re
 from multiprocessing import Pool as ThreadPool 
 from scripts import utils
 try:
@@ -166,14 +167,27 @@ def analyse_species(dict_sp):
     return present, dupli
 
 
-def correct_HSP(qu_seq, ta_seq):
-    # remove insertions from target HSP
-    for i, k in enumerate(qu_seq):
-        if k == '-':
-            #ta_seq[i] = 'z'
-            ta_seq = ta_seq[:i] + 'z' + ta_seq[(i+1):]
-    no_insertion = ta_seq.replace('z','')
-    return no_insertion
+''' new function that create aligned HSP sequences from cigar string '''
+def extract_HSP(full_seq, start, cig):
+    # split cigar 
+    matches = re.findall(r'(\d+)([A-Z]{1})', cig)
+    l_tup = [(m[1], int(m[0])) for m in matches]
+
+    # reconstruct HSP
+    hsp = ''
+    position = start
+    for t in l_tup:
+        # case of aa matches
+        if t[0] == 'M':
+            hsp += full_seq[position:(position + t[1])]
+            position += t[1]
+        # case of deletion
+        elif t[0] == 'D':
+            position += t[1]
+        # case of insertion
+        elif t[0] == 'I':
+            hsp += '-' * t[1]
+    return hsp
 
 
 def process_location(qu_start, qu_end, min_start, max_end):
@@ -214,7 +228,7 @@ def process_file(file):
     for file_db in list_files:
         search_output = index + '_' + file_db.replace('.fas','.gz')
         subprocess.check_output(path_diamond + ' blastp --quiet --threads 1 --db ./dir_step2/' + file_db.replace('.fas','.db') + ' --max-target-seqs ' + str(max_per_species) + ' --query ./dir_step1/' + file + ' \
-                 --compress 1 --more-sensitive -e ' + str(evalue) + ' -o ./dir_step2/' + index + '/' + search_output + ' --outfmt 6 qseqid sseqid qstart qend qseq_gapped sseq_gapped 2>&1', shell=True)
+                 --compress 1 --more-sensitive -e ' + str(evalue) + ' -o ./dir_step2/' + index + '/' + search_output + ' --outfmt 6 qseqid sseqid qstart qend sstart cigar 2>&1', shell=True)
     
     ## get all hits in a dict of list
     all_output = collections.defaultdict(list)
@@ -247,13 +261,13 @@ def process_file(file):
             species = name_2_sp_phylip_seq[target][0]
             qu_start = int(ll[1]) -1
             qu_end   = int(ll[2]) -1
-            qu_seq = ll[3]
-            ta_seq = ll[4]
+            ta_start = int(ll[3]) -1
+            cigar    = ll[4]
             
             if target in all_hits:
                 # extract HSP and add to target seq
-                corrected_HSP = correct_HSP(qu_seq, ta_seq)
-                all_hits[target] = all_hits[target][:qu_start] + corrected_HSP + all_hits[target][qu_end + 1:]
+                HSP = extract_HSP(name_2_sp_phylip_seq[target][2], ta_start, cigar)
+                all_hits[target] = all_hits[target][:qu_start] + HSP + all_hits[target][qu_end + 1:]
                 min_start, max_end = process_location(qu_start, qu_end, min_start, max_end) 
                 
             else:
@@ -261,8 +275,8 @@ def process_file(file):
                 # create target seq
                 all_hits[target] = '-' * len(name_2_sp_phylip_seq[prot][2])
                 # extract HSP
-                corrected_HSP = correct_HSP(qu_seq, ta_seq)
-                all_hits[target] = all_hits[target][:qu_start] + corrected_HSP + all_hits[target][qu_end + 1:]
+                HSP = extract_HSP(name_2_sp_phylip_seq[target][2], ta_start, cigar)
+                all_hits[target] = all_hits[target][:qu_start] + HSP + all_hits[target][qu_end + 1:]
                 min_start, max_end = process_location(qu_start, qu_end, min_start, max_end)              
             
             # reduce output for pickle (convert all element to integers)