NO_JIRA Adding two scripts from ccdc confidential for conformer analysis. Ori… #91

Alex-AMC · 2026-02-04T10:36:28Z

Can you please expan on this Readme to include the sections that is in the template https://github.com/ccdc-opensource/csd-python-api-scripts/tree/main/scripts/new_script_readme_template

Alex-AMC · 2026-02-04T10:39:26Z

Same as previous comment please :)

And can you add the scripts to the ReadMe in the script folder too please :)

-Original file line number
+Diff line change
@@ -0,0 +1,5 @@
+    # Conformer Filter Density
+    Filter conformers using a variety of metrics. See arguments in script for details.
+    CCDC Python API Licence required, minimum version: 3.0.15

-Original file line number
+Diff line change
@@ -0,0 +1,189 @@
+    #!/usr/bin/env python3
+    #
+    # This script can be used for any purpose without limitation subject to the
+    # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+    #
+    # This permission notice and the following statement of attribution must be
+    # included in all copies or substantial portions of this script.
+    #
+    # 2026-02-03: created by the Cambridge Crystallographic Data Centre
+    import argparse
+    import csv
+    from ccdc import conformer, io
+    def parse_args():
+        """Parse command line arguments."""
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('inmolfn',
+                            metavar='<input molecule file>',
+                            help='Input file (single- or multi-molecule file)')
+        parser.add_argument('-m', '--mode',
+                            choices=['absolute', 'relative'],
+                            default='absolute',
+                            help='Limit mode: absolute (fixed threshold) or relative '
+                                 '(threshold based on molecule with fewest unusual torsions). '
+                                 'WARNING: Relative mode may behave unexpectedly with conformers '
+                                 'from multiple input molecules (default: %(default)s)')
+        parser.add_argument('-l', '--limit',
+                            dest='torsion_limit',
+                            type=int,
+                            default=0,
+                            metavar='<limit>',
+                            help='Maximum number of unusual torsions for a passing molecule '
+                                 '(default: %(default)s)')
+        parser.add_argument('-d', '--local-density',
+                            dest='local_density_threshold',
+                            type=float,
+                            default=10.0,
+                            metavar='<threshold>',
+                            help='Local density threshold for classifying a torsion as unusual '
+                                 '(default: %(default)s)')
+        parser.add_argument('--incl-organometallics',
+                            dest='incl_organometallics',
+                            action='store_true',
+                            help='Include organometallic compounds in the search '
+                                 '(default: organic compounds only)')
+        parser.add_argument('--generalisation',
+                            action='store_true',
+                            help='Turn on generalisation for searches')
+        parser.add_argument('--successfn',
+                            default='successes.mol',
+                            metavar='<file>',
+                            help='Output file for molecules that pass the filter '
+                                 '(default: %(default)s)')
+        parser.add_argument('--failurefn',
+                            default='failures.mol',
+                            metavar='<file>',
+                            help='Output file for molecules that fail the filter '
+                                 '(default: %(default)s)')
+        parser.add_argument('-u', '--unusual-torsions',
+                            dest='unusualtorsionfn',
+                            default='unusual_torsions.csv',
+                            metavar='<file>',
+                            help='Output CSV file for unusual torsion details '
+                                 '(default: %(default)s)')
+        return parser.parse_args()
+    def create_mogul_engine(local_density_threshold, incl_organometallics, generalisation):
+        """Create and configure a geometry analyser engine.
+        Args:
+            local_density_threshold: Threshold for classifying torsions as unusual
+            incl_organometallics: Whether to include organometallic compounds
+            generalisation: Whether to enable generalisation for searches
+        Returns:
+            Configured ccdc.conformer.GeometryAnalyser instance
+        """
+        engine = conformer.GeometryAnalyser()
+        engine.settings.bond.analyse = False
+        engine.settings.angle.analyse = False
+        engine.settings.ring.analyse = False
+        engine.settings.torsion.local_density_threshold = local_density_threshold
+        engine.settings.generalisation = generalisation
+        engine.settings.organometallic_filter = 'all' if incl_organometallics else 'organics_only'
+        return engine
+    def analysis(torsion_limit, input_filename, mode, engine, success_file, failure_file, unusual_torsion_file):
+        """Analyze molecules for unusual torsions and filter based on criteria.
+        Args:
+            torsion_limit: Maximum number of unusual torsions allowed
+            input_filename: Path to input molecule file
+            mode: 'absolute' or 'relative' filtering mode
+            engine: Configured GeometryAnalyser instance
+            success_file: Path to output file for passing molecules
+            failure_file: Path to output file for failing molecules
+            unusual_torsion_file: Path to CSV file for unusual torsion details
+        """
+        # Analyze all molecules and collect unusual torsion data
+        molecules = []
+        min_unusual_torsions = float('inf')
+        with io.MoleculeReader(input_filename) as mol_reader:
+            for molecule in mol_reader:
+                molecule.standardise_aromatic_bonds()
+                molecule.standardise_delocalised_bonds()
+                geometry_analysed_molecule = engine.analyse_molecule(molecule)
+                molecule.unusual_torsions = [
+                    t for t in geometry_analysed_molecule.analysed_torsions
+                    if t.unusual and t.enough_hits
+                ]
+                molecule.num_unusual_torsions = len(molecule.unusual_torsions)
+                molecules.append(molecule)
+                min_unusual_torsions = min(min_unusual_torsions, molecule.num_unusual_torsions)
+        # Write results
+        with io.MoleculeWriter(success_file) as passed_writer, \
+             io.MoleculeWriter(failure_file) as failed_writer, \
+             open(unusual_torsion_file, 'w', newline='') as csv_file:
+            csv_writer = csv.writer(csv_file)
+            csv_writer.writerow(['MoleculeIndex', 'Value', 'Zscore', 'LocalDensity', 'NumHits', 'Atoms'])
+            for idx, molecule in enumerate(molecules):
+                threshold = torsion_limit if mode == 'absolute' else min_unusual_torsions + torsion_limit
+                failed = molecule.num_unusual_torsions > threshold
+                if failed:
+                    failed_writer.write(molecule)
+                    for torsion in molecule.unusual_torsions:
+                        csv_writer.writerow([
+                            idx,
+                            torsion.value,
+                            torsion.z_score,
+                            torsion.local_density,
+                            torsion.nhits,
+                            ' '.join(torsion.atom_labels)
+                        ])
+                else:
+                    passed_writer.write(molecule)
+    def run():
+        """Main entry point for the script."""
+        args = parse_args()
+        if args.torsion_limit < 0:
+            raise ValueError('Torsion limit must be >= 0')
+        engine = create_mogul_engine(
+            args.local_density_threshold,
+            args.incl_organometallics,
+            args.generalisation
+        )
+        analysis(
+            args.torsion_limit,
+            args.inmolfn,
+            args.mode,
+            engine,
+            args.successfn,
+            args.failurefn,
+            args.unusualtorsionsfn,
+        )
+    if __name__ == '__main__':
+        run()

-Original file line number
+Diff line change
@@ -0,0 +1,5 @@
+    # Filter Poses
+    This is a short script to filter molecular poses in a multi-molecule file based on the torsion probabilities.
+    CCDC Python API Licence required, minimum version: 3.0.15

-Original file line number
+Diff line change
@@ -0,0 +1,189 @@
+    #!/usr/bin/env python3
+    #
+    # This script can be used for any purpose without limitation subject to the
+    # conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
+    #
+    # This permission notice and the following statement of attribution must be
+    # included in all copies or substantial portions of this script.
+    #
+    # 2025-02-03: created by the Cambridge Crystallographic Data Centre
+    import argparse
+    import copy
+    import csv
+    import math
+    from ccdc import conformer, io
+    def parse_args():
+        """Parse command line arguments."""
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('conformer_file',
+                            metavar='<input file>',
+                            help='Input file (multi-molecule file)')
+        parser.add_argument('-csv', '--write-csv',
+                            dest='write_csv',
+                            action='store_true',
+                            help='Write a csv file for all the analysed conformers.')
+        return parser.parse_args()
+    class ProbabilityScorer:
+        """
+        Use the ConformerGenerator and GeometryAnalyser to score conformers based on their conformational
+        probabilities and unusual torsions.
+        """
+        def __init__(self, user_conformer_generator_settings=None, skip_minimisation=True,
+                     user_mogul_analysis_settings=None):
+            self._generator = self._create_conformer_generator(user_conformer_generator_settings, skip_minimisation)
+            self._mogul_analysis_engine = self._create_analyser(user_mogul_analysis_settings)
+        def _create_analyser(self, user_mogul_analyser_settings):
+            """Create a GeometryAnalyser engine to analyse the conformers."""
+            engine = conformer.GeometryAnalyser()
+            # Tweak the 'default defaults'
+            # By default, in this use case, we do not want to use generalisation.
+            engine.settings.generalisation = False
+            # By default, only the organic subset, i.e. exclude organometallic
+            engine.settings.organometallic_filter = 'Organic'
+            # Ensure user settings are kept:
+            if user_mogul_analyser_settings is not None:
+                engine.settings = copy.copy(user_mogul_analyser_settings)
+            # Only analyse torsions:
+            engine.settings.bond.analyse = False
+            engine.settings.angle.analyse = False
+            engine.settings.ring.analyse = False
+            return engine
+        def _create_conformer_generator(self, user_generator_settings, skip_minimisation=True):
+            """Create a ConformerGenerator engine to generate conformers for the molecules."""
+            settings = conformer.ConformerSettings()
+            if user_generator_settings is not None:
+                settings = copy.copy(user_generator_settings)
+            # Mandatory setting here: this must work in use_input_torsion_distributions mode.
+            settings.use_input_torsion_distributions = True
+            engine = conformer.ConformerGenerator(settings, skip_minimisation)
+            return engine
+        @staticmethod
+        def _combined_local_density(all_local_densities):
+            if all_local_densities:
+                return sum(all_local_densities) / len(all_local_densities)
+            return 100.0
+        def probability_analysis(self, molecule, bad_normalised_score=1.0, bad_probability_score=0.0,
+                                 bad_rmsd_score=9999.9):
+            # Approximation excluding rings:
+            is_rigid = sum(bond.is_rotatable for bond in molecule.bonds) == 0
+            conformers = self._generator.generate(molecule)
+            normalised_score = None
+            rmsd = None
+            ln_probability = None
+            if conformers:
+                # First conformer is the most likely, so return its score.
+                normalised_score = conformers[0].normalised_score
+                rmsd = conformers[0].rmsd()
+                ln_probability = conformers[0].probability
+                if is_rigid:
+                    if ln_probability is None:
+                        ln_probability = 0.0
+                    if normalised_score is None:
+                        normalised_score = 0.0
+            if normalised_score is None:
+                normalised_score = bad_normalised_score
+            if rmsd is None:
+                rmsd = bad_rmsd_score
+            probability = bad_probability_score
+            if ln_probability is not None:
+                probability = math.exp(ln_probability)
+            return normalised_score, probability, rmsd
+        def unusual_torsions_analysis(self, molecule, max_hist_size=15):
+            checked_mol = self._mogul_analysis_engine.analyse_molecule(molecule)
+            unusual_count = 0
+            local_densities = []
+            unusual_local_densities = []
+            no_data_torsions = 0
+            for tor in checked_mol.analysed_torsions:
+                hist_size = sum(tor.histogram())
+                if hist_size > max_hist_size:
+                    local_densities.append(tor.local_density)
+                    if tor.unusual:
+                        unusual_local_densities.append(tor.local_density)
+                        unusual_count += 1
+                else:
+                    no_data_torsions += 1
+                    # Expected number of torsions within 10 degrees assuming even distribution of observations.
+                    # Number of Mogul bins would be strictly better than assuming 18.
+                    uniform_torsion_dist = 100 / 18
+                    local_densities.append(uniform_torsion_dist)
+            return unusual_count, self._combined_local_density(local_densities), self._combined_local_density(
+                unusual_local_densities), no_data_torsions
+        def process_molecule(self, molecule):
+            molecule.remove_unknown_atoms()
+            molecule.assign_bond_types()
+            unusual_count, avg_local_density, avg_unusual_local_density, no_data_torsions = self.unusual_torsions_analysis(
+                molecule)
+            normalised_score, probability, rmsd = self.probability_analysis(molecule)
+            return {
+                "identifier": molecule.identifier,
+                "number of unusual torsions": unusual_count,
+                "average local density": avg_local_density,
+                "average unusual local density": avg_unusual_local_density,
+                "number of torsions with no data": no_data_torsions,
+                "normalised probability score": normalised_score,
+                "probability score": probability,
+                "RMSD to input conformation": rmsd,
+            }
+    def run():
+        args = parse_args()
+        p = ProbabilityScorer()
+        all_data = []
+        with io.MoleculeReader(args.conformer_file) as reader:
+            for i, molecule in enumerate(reader):
+                data = p.process_molecule(molecule)
+                if i == 0:
+                    print(", ".join(data.keys()))
+                print(", ".join(str(value) for value in data.values()))
+                all_data.append(data)
+        if args.write_csv:
+            keys = all_data[0].keys()
+            with open('filtered_poses_analysis.csv', 'w', newline='') as output_file:
+                dict_writer = csv.DictWriter(output_file, keys)
+                dict_writer.writeheader()
+                dict_writer.writerows(all_data)
+    if __name__ == "__main__":
+        run()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

NO_JIRA Adding two scripts from ccdc confidential for conformer analysis. Ori… #91

Diff view

Diff view

There are no files selected for viewing

Alex-AMC Feb 4, 2026

Uh oh!

Alex-AMC Feb 4, 2026

Uh oh!

Uh oh!

NO_JIRA Adding two scripts from ccdc confidential for conformer analysis. Ori… #91

NO_JIRA Adding two scripts from ccdc confidential for conformer analysis. Ori… #91

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Alex-AMC Feb 4, 2026

Choose a reason for hiding this comment

Uh oh!

Alex-AMC Feb 4, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!