From 716caa21841d31227bfac983bf654e0c987c7302 Mon Sep 17 00:00:00 2001 From: Chris Ringrose <119854158+cringroseccdc@users.noreply.github.com> Date: Tue, 3 Feb 2026 18:16:44 +0000 Subject: [PATCH] Adding two scripts from ccdc confidential for conformer analysis. Original scripts located here: https://github.com/ccdc-confidential/cpp-apps-main/tree/main/wrapping/contributed The scripts have been heavily edited for usability, clarity and maintainability, but the science is the same. --- scripts/conformer_filter_density/ReadMe.md | 5 + .../conformer_filter_density.py | 189 ++++++++++++++++++ scripts/filter_poses/ReadMe.md | 5 + scripts/filter_poses/filter_poses.py | 189 ++++++++++++++++++ 4 files changed, 388 insertions(+) create mode 100644 scripts/conformer_filter_density/ReadMe.md create mode 100644 scripts/conformer_filter_density/conformer_filter_density.py create mode 100644 scripts/filter_poses/ReadMe.md create mode 100644 scripts/filter_poses/filter_poses.py diff --git a/scripts/conformer_filter_density/ReadMe.md b/scripts/conformer_filter_density/ReadMe.md new file mode 100644 index 0000000..020d3ee --- /dev/null +++ b/scripts/conformer_filter_density/ReadMe.md @@ -0,0 +1,5 @@ +# Conformer Filter Density + +Filter conformers using a variety of metrics. See arguments in script for details. + +CCDC Python API Licence required, minimum version: 3.0.15 diff --git a/scripts/conformer_filter_density/conformer_filter_density.py b/scripts/conformer_filter_density/conformer_filter_density.py new file mode 100644 index 0000000..8b684b8 --- /dev/null +++ b/scripts/conformer_filter_density/conformer_filter_density.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# +# This script can be used for any purpose without limitation subject to the +# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2026-02-03: created by the Cambridge Crystallographic Data Centre + +import argparse +import csv + +from ccdc import conformer, io + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description=__doc__) + + parser.add_argument('inmolfn', + metavar='', + help='Input file (single- or multi-molecule file)') + + parser.add_argument('-m', '--mode', + choices=['absolute', 'relative'], + default='absolute', + help='Limit mode: absolute (fixed threshold) or relative ' + '(threshold based on molecule with fewest unusual torsions). ' + 'WARNING: Relative mode may behave unexpectedly with conformers ' + 'from multiple input molecules (default: %(default)s)') + + parser.add_argument('-l', '--limit', + dest='torsion_limit', + type=int, + default=0, + metavar='', + help='Maximum number of unusual torsions for a passing molecule ' + '(default: %(default)s)') + + parser.add_argument('-d', '--local-density', + dest='local_density_threshold', + type=float, + default=10.0, + metavar='', + help='Local density threshold for classifying a torsion as unusual ' + '(default: %(default)s)') + + parser.add_argument('--incl-organometallics', + dest='incl_organometallics', + action='store_true', + help='Include organometallic compounds in the search ' + '(default: organic compounds only)') + + parser.add_argument('--generalisation', + action='store_true', + help='Turn on generalisation for searches') + + parser.add_argument('--successfn', + default='successes.mol', + metavar='', + help='Output file for molecules that pass the filter ' + '(default: %(default)s)') + + parser.add_argument('--failurefn', + default='failures.mol', + metavar='', + help='Output file for molecules that fail the filter ' + '(default: %(default)s)') + + parser.add_argument('-u', '--unusual-torsions', + dest='unusualtorsionfn', + default='unusual_torsions.csv', + metavar='', + help='Output CSV file for unusual torsion details ' + '(default: %(default)s)') + + return parser.parse_args() + + +def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation): + """Create and configure a geometry analyser engine. + + Args: + local_density_threshold: Threshold for classifying torsions as unusual + incl_organometallics: Whether to include organometallic compounds + generalisation: Whether to enable generalisation for searches + + Returns: + Configured ccdc.conformer.GeometryAnalyser instance + """ + engine = conformer.GeometryAnalyser() + + engine.settings.bond.analyse = False + engine.settings.angle.analyse = False + engine.settings.ring.analyse = False + + engine.settings.torsion.local_density_threshold = local_density_threshold + engine.settings.generalisation = generalisation + engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only' + + return engine + + +def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file): + """Analyze molecules for unusual torsions and filter based on criteria. + + Args: + torsion_limit: Maximum number of unusual torsions allowed + input_filename: Path to input molecule file + mode: 'absolute' or 'relative' filtering mode + engine: Configured GeometryAnalyser instance + success_file: Path to output file for passing molecules + failure_file: Path to output file for failing molecules + unusual_torsion_file: Path to CSV file for unusual torsion details + """ + # Analyze all molecules and collect unusual torsion data + molecules = [] + min_unusual_torsions = float('inf') + + with io.MoleculeReader(input_filename) as mol_reader: + for molecule in mol_reader: + molecule.standardise_aromatic_bonds() + molecule.standardise_delocalised_bonds() + + geometry_analysed_molecule = engine.analyse_molecule(molecule) + + molecule.unusual_torsions = [ + t for t in geometry_analysed_molecule.analysed_torsions + if t.unusual and t.enough_hits + ] + molecule.num_unusual_torsions = len(molecule.unusual_torsions) + molecules.append(molecule) + + min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions) + + # Write results + with io.MoleculeWriter(success_file) as passed_writer, \ + io.MoleculeWriter(failure_file) as failed_writer, \ + open(unusual_torsion_file, 'w', newline='') as csv_file: + + csv_writer = csv.writer(csv_file) + csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms']) + + for idx, molecule in enumerate(molecules): + threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit + failed = molecule.num_unusual_torsions > threshold + + if failed: + failed_writer.write(molecule) + for torsion in molecule.unusual_torsions: + csv_writer.writerow([ + idx, + torsion.value, + torsion.z_score, + torsion.local_density, + torsion.nhits, + ' '.join(torsion.atom_labels) + ]) + else: + passed_writer.write(molecule) + + +def run(): + """Main entry point for the script.""" + args = parse_args() + + if args.torsion_limit < 0: + raise ValueError('Torsion limit must be >= 0') + + engine = create_mogul_engine( + args.local_density_threshold, + args.incl_organometallics, + args.generalisation + ) + + analysis( + args.torsion_limit, + args.inmolfn, + args.mode, + engine, + args.successfn, + args.failurefn, + args.unusualtorsionsfn, + ) + + +if __name__ == '__main__': + run() diff --git a/scripts/filter_poses/ReadMe.md b/scripts/filter_poses/ReadMe.md new file mode 100644 index 0000000..e909ab5 --- /dev/null +++ b/scripts/filter_poses/ReadMe.md @@ -0,0 +1,5 @@ +# Filter Poses + +This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities. + +CCDC Python API Licence required, minimum version: 3.0.15 diff --git a/scripts/filter_poses/filter_poses.py b/scripts/filter_poses/filter_poses.py new file mode 100644 index 0000000..3827e16 --- /dev/null +++ b/scripts/filter_poses/filter_poses.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# +# This script can be used for any purpose without limitation subject to the +# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2025-02-03: created by the Cambridge Crystallographic Data Centre + +import argparse +import copy +import csv +import math + +from ccdc import conformer, io + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description=__doc__) + + parser.add_argument('conformer_file', + metavar='', + help='Input file (multi-molecule file)') + + parser.add_argument('-csv', '--write-csv', + dest='write_csv', + action='store_true', + help='Write a csv file for all the analysed conformers.') + + return parser.parse_args() + + +class ProbabilityScorer: + """ + Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational + probabilities and unusual torsions. + """ + def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True, + user_mogul_analysis_settings=None): + + self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation) + self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings) + + def _create_analyser(self, user_mogul_analyser_settings): + """Create a GeometryAnalyser engine to analyse the conformers.""" + engine = conformer.GeometryAnalyser() + + # Tweak the 'default defaults' + # By default, in this use case, we do not want to use generalisation. + engine.settings.generalisation = False + # By default, only the organic subset, i.e. exclude organometallic + engine.settings.organometallic_filter = 'Organic' + + # Ensure user settings are kept: + if user_mogul_analyser_settings is not None: + engine.settings = copy.copy(user_mogul_analyser_settings) + + # Only analyse torsions: + engine.settings.bond.analyse = False + engine.settings.angle.analyse = False + engine.settings.ring.analyse = False + + return engine + + def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True): + """Create a ConformerGenerator engine to generate conformers for the molecules.""" + settings = conformer.ConformerSettings() + if user_generator_settings is not None: + settings = copy.copy(user_generator_settings) + + # Mandatory setting here: this must work in use_input_torsion_distributions mode. + settings.use_input_torsion_distributions = True + + engine = conformer.ConformerGenerator(settings, skip_minimisation) + return engine + + @staticmethod + def _combined_local_density(all_local_densities): + if all_local_densities: + return sum(all_local_densities) / len(all_local_densities) + return 100.0 + + def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0, + bad_rmsd_score=9999.9): + + # Approximation excluding rings: + is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0 + + conformers = self._generator.generate(molecule) + + normalised_score = None + rmsd = None + ln_probability = None + + if conformers: + # First conformer is the most likely, so return its score. + normalised_score = conformers[0].normalised_score + rmsd = conformers[0].rmsd() + ln_probability = conformers[0].probability + + if is_rigid: + if ln_probability is None: + ln_probability = 0.0 + if normalised_score is None: + normalised_score = 0.0 + + if normalised_score is None: + normalised_score = bad_normalised_score + + if rmsd is None: + rmsd = bad_rmsd_score + + probability = bad_probability_score + if ln_probability is not None: + probability = math.exp(ln_probability) + + return normalised_score, probability, rmsd + + def unusual_torsions_analysis(self, molecule, max_hist_size=15): + checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule) + unusual_count = 0 + local_densities = [] + unusual_local_densities = [] + no_data_torsions = 0 + for tor in checked_mol.analysed_torsions: + hist_size = sum(tor.histogram()) + if hist_size > max_hist_size: + local_densities.append(tor.local_density) + + if tor.unusual: + unusual_local_densities.append(tor.local_density) + unusual_count += 1 + else: + no_data_torsions += 1 + + # Expected number of torsions within 10 degrees assuming even distribution of observations. + # Number of Mogul bins would be strictly better than assuming 18. + uniform_torsion_dist = 100 / 18 + local_densities.append(uniform_torsion_dist) + return unusual_count, self._combined_local_density(local_densities), self._combined_local_density( + unusual_local_densities), no_data_torsions + + def process_molecule(self, molecule): + molecule.remove_unknown_atoms() + molecule.assign_bond_types() + + unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis( + molecule) + normalised_score, probability, rmsd = self.probability_analysis(molecule) + + return { + "identifier": molecule.identifier, + "number of unusual torsions": unusual_count, + "average local density": avg_local_density, + "average unusual local density": avg_unusual_local_density, + "number of torsions with no data": no_data_torsions, + "normalised probability score": normalised_score, + "probability score": probability, + "RMSD to input conformation": rmsd, + } + + +def run(): + args = parse_args() + + p = ProbabilityScorer() + + all_data = [] + + with io.MoleculeReader(args.conformer_file) as reader: + for i, molecule in enumerate(reader): + data = p.process_molecule(molecule) + if i == 0: + print(", ".join(data.keys())) + print(", ".join(str(value) for value in data.values())) + all_data.append(data) + + if args.write_csv: + keys = all_data[0].keys() + with open('filtered_poses_analysis.csv', 'w', newline='') as output_file: + dict_writer = csv.DictWriter(output_file, keys) + dict_writer.writeheader() + dict_writer.writerows(all_data) + + +if __name__ == "__main__": + run()