Get the first X molecules from a large .Mol2 file
Written by D. Lagorce
Put the following lines in a file, call it for instance count.py
--------------------------------
#! /usr/bin/env python
from string import *
import sys
import os.path
def mol_counter(fhi,n_mol):
result = ""
flag = 0
line = fhi.readline()
while line != "":
if line[:-1] == "@<TRIPOS>MOLECULE":
flag = flag + 1
if flag == n_mol+1:
break
result += line
line = fhi.readline()
return result
if __name__=='__main__':
file_in = sys.argv[1]
n = int(sys.argv[2])
fh_in = open(file_in,'r')
file_path = str(os.path.basename(file_in))
file_out = './best_'+str(n)+'_'+ file_path
fh_out = open(file_out,'w')
res = mol_counter(fh_in,n)
fh_out.write(res)
fh_out.close()
fh_in.close()
-------------------------------------
run it like this on the terminal window:
./compteur.py mybank.mol2 3000
this means that you want to get the first 3000 molecules from your large compound collections
the output file written in your working directory will be: best_3000_mybank.mol2
clearly, the molecules need to start with <TRIPOS>...