InChI
 
Loading...
Searching...
No Matches
util.h
Go to the documentation of this file.
1/*
2 * International Chemical Identifier (InChI)
3 * Version 1
4 * Software version 1.07
5 * April 30, 2024
6 *
7 * MIT License
8 *
9 * Copyright (c) 2024 IUPAC and InChI Trust
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in all
19 * copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 *
29 * The InChI library and programs are free software developed under the
30 * auspices of the International Union of Pure and Applied Chemistry (IUPAC).
31 * Originally developed at NIST.
32 * Modifications and additions by IUPAC and the InChI Trust.
33 * Some portions of code were developed/changed by external contributors
34 * (either contractor or volunteer) which are listed in the file
35 * 'External-contributors' included in this distribution.
36 *
37 * info@inchi-trust.org
38 *
39 */
40
41#ifndef _UTIL_H_
42#define _UTIL_H_
43
44#include "inpdef.h"
45
46#define EL_NUMBER_H ((U_CHAR)1)
47#define EL_NUMBER_B ((U_CHAR)5)
48#define EL_NUMBER_C ((U_CHAR)6)
49#define EL_NUMBER_N ((U_CHAR)7)
50#define EL_NUMBER_O ((U_CHAR)8)
51#define EL_NUMBER_F ((U_CHAR)9)
52#define EL_NUMBER_SI ((U_CHAR)14)
53#define EL_NUMBER_P ((U_CHAR)15)
54#define EL_NUMBER_S ((U_CHAR)16)
55#define EL_NUMBER_CL ((U_CHAR)17)
56#define EL_NUMBER_GE ((U_CHAR)32)
57#define EL_NUMBER_AS ((U_CHAR)33)
58#define EL_NUMBER_SE ((U_CHAR)34)
59#define EL_NUMBER_BR ((U_CHAR)35)
60#define EL_NUMBER_SB ((U_CHAR)51)
61#define EL_NUMBER_TE ((U_CHAR)52)
62#define EL_NUMBER_I ((U_CHAR)53)
63#define EL_NUMBER_PO ((U_CHAR)84)
64#define EL_NUMBER_AT ((U_CHAR)85)
65
66#define EL_NUMBER_ZY ((U_CHAR)119)
67#define EL_NUMBER_ZZ ((U_CHAR)120)
68
69#ifndef COMPILE_ALL_CPP
70#ifdef __cplusplus
71extern "C" {
72#endif
73#endif
74
81int get_atomic_mass(const char *elname);
82
89int get_atomic_mass_from_elnum(int nAtNum);
90
106int get_num_H(const char *elname, int inp_num_H, S_CHAR num_iso_H[], int charge, int radical, int chem_bonds_valence, int atom_input_valence, int bAliased, int bDoNotAddH, int bHasMetalNeighbor);
107
116int extract_charges_and_radicals(char *elname, int *pnRadical, int *pnCharge);
117
125int extract_H_atoms(char *elname, S_CHAR num_iso_H[]);
126
133int normalize_string(char *name);
134
144int read_upto_delim(char **pstring, char *field, int maxlen, char *delims);
145
153int is_matching_any_delim(char c, char *delims);
154
161int dotify_non_printable_chars(char *line);
162
170char *lrtrim(char *p, int *nLen);
171
177void remove_trailing_spaces(char *p);
178
184void remove_one_lf(char *p);
185
196int mystrncpy(char *target, const char *source, unsigned maxlen);
197
203void mystrrev(char *p);
204
205#define ALPHA_BASE 27
206
215int inchi_memicmp(const void *p1, const void *p2, size_t length);
216
224int inchi_stricmp(const char *s1, const char *s2);
225
234char *inchi__strnset(char *s, int val, size_t length);
235
242char *inchi__strdup(const char *string);
243
252long inchi_strtol(const char *str, const char **p, int base);
253
261double inchi_strtod(const char *str, const char **p);
262
271AT_NUMB *is_in_the_list(AT_NUMB *pathAtom, AT_NUMB nNextAtom, int nPathLen);
272
281int *is_in_the_ilist(int *pathAtom, int nNextAtom, int nPathLen);
282
292int is_ilist_inside(int *ilist, int nlist, int *ilist2, int nlist2);
293
301void extract_inchi_substring(char **buf, const char *str, size_t slen);
302
310void extract_auxinfo_substring(char **buf, const char *str, size_t slen);
311
319int extract_orig_nums_from_auxinfo_string(char *saux, int *orig);
320
332int extract_nonstereo_eq_classes_from_auxinfo_string(char *saux, int nat, int *orig, int *have_eclass_info, int *eclass, int *eclass_by_origs);
333
343int extract_stereo_info_from_inchi_string(char *sinchi, int nat, int *orig, int *at_stereo_mark);
344
354int extract_all_backbone_bonds_from_inchi_string(char *sinchi, int *n_all_bkb_orig, int *orig, int *all_bkb_orig);
355
362int get_periodic_table_number(const char *elname);
363
370int is_el_a_metal(int nPeriodicNum);
371
380int get_el_valence(int nPeriodicNum, int charge, int val_num);
381
393int get_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int num_H, int num_bonds);
394
395/* Output valence that does not fit any known valences */
396
408int detect_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int num_H, int num_bonds);
409
422int needed_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int actual_bonds_val, int num_H, int num_bonds);
423
430int get_el_type(int nPeriodicNum);
431
438int if_skip_add_H(int nPeriodicNum);
439
447int get_element_chemical_symbol(int nAtNum, char *szElement);
448
456int get_element_or_pseudoelement_symbol(int nAtNum, char *szElement);
457
469int MakeRemovedProtonsString(int nNumRemovedProtons, NUM_H *nNumExchgIsotopicH, NUM_H *nNumRemovedProtonsIsotopic, int bIsotopic, char *szRemovedProtons, int *num_removed_iso_H);
470
471/*
472 Ion pairs and fixing bonds
473*/
474
482int num_of_H(inp_ATOM *at, int iat);
483
490U_CHAR ion_el_group(int el);
491
500int has_other_ion_neigh(inp_ATOM *at, int iat, int iat_ion_neigh);
501
510int has_other_ion_in_sphere_2(inp_ATOM *at, int iat, int iat_ion_neigh);
511
519int nNoMetalNumBonds(inp_ATOM *at, int at_no);
520
528int nNoMetalBondsValence(inp_ATOM *at, int at_no);
529
537int nNoMetalNeighIndex(inp_ATOM *at, int at_no);
538
547int nNoMetalOtherNeighIndex(inp_ATOM *at, int at_no, int cur_neigh);
548
558int nNoMetalOtherNeighIndex2(inp_ATOM *at, int at_no, int cur_neigh, int cur_neigh2);
559
567int nBondsValToMetal(inp_ATOM *at, int iat);
568
577int nBondsValenceInpAt(const inp_ATOM *at, int *nNumAltBonds, int *nNumWrongBonds);
578
586int bHeteroAtomMayHaveXchgIsoH(inp_ATOM *atom, int iat);
587
594int get_endpoint_valence(U_CHAR el_number);
595#if (KETO_ENOL_TAUT == 1)
596
603int get_endpoint_valence_KET(U_CHAR el_number);
604#endif
605
606#define MIN_ATOM_CHARGE (-2)
607#define MAX_ATOM_CHARGE 2
608#define NEUTRAL_STATE (-MIN_ATOM_CHARGE)
609#define NUM_ATOM_CHARGES (MAX_ATOM_CHARGE - MIN_ATOM_CHARGE + 1)
610#define MAX_NUM_VALENCES 5 /* max. number + 1 to provide zero termination */
611
612/* CHEMICAL ELEMENTS & ATOMIC VALENCE MODEL
613 FOR VARIOUS OXIDATION STATES
614*/
615typedef struct tagElData
616{
617 /* Element chemical symbol */
618 const char* szElName;
619 /* Average atomic mass from the Periodic Chart of the Elements
620 (Fisher cat. no. 05-702-10) */
622 /* (not used currently) Atomic mass of the most abundant isotope */
624 /* (not used currently) Exact mw of the most abundant isotope (not used) */
625 double dAtMass;
626 /* METAL or METAL2 */
627 int nType;
628 /* (not used currently) Pauling electronegativity x 10; 0 means unknown */
630 /* InChI does not add implicit H to atoms that have non-zero bSkipAddingH */
631 /* NB: was called bDoNotAddH, renamed to avoid confusion with other procedures */
635
636/* Forward declaration */
637struct tagCANON_GLOBALS;
638
645int SetBitFree(struct tagCANON_GLOBALS *pCG);
646
653void WriteCoord(char *str, double x);
654extern const int ERR_ELEM;
655extern const int nElDataLen;
656
657#ifndef COMPILE_ALL_CPP
658#ifdef __cplusplus
659}
660#endif
661#endif
662
663#ifndef INCHI_BUILD_PLATFORM
664
665#if defined(_WIN32)
666
667#if defined(_WIN64)
668#define INCHI_BUILD_PLATFORM "Windows 64-bit"
669#else
670#define INCHI_BUILD_PLATFORM "Windows 32-bit"
671#endif
672
673#elif defined(__linux__)
674
675#if defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) /* djb-rwth: macro added for 64-bit ARM CPUs -- GH issue #10, thanks to Vincent F. Scalfani */
676#define INCHI_BUILD_PLATFORM "Linux 64-bit"
677#else
678#define INCHI_BUILD_PLATFORM "Linux 32-bit"
679#endif
680
681#elif defined(__APPLE__)
682#define INCHI_BUILD_PLATFORM "OSX"
683
684#else
685#define INCHI_BUILD_PLATFORM ""
686#endif
687#endif
688
689#ifndef INCHI_BUILD_DEBUG
690#if defined(_DEBUG)
691#define INCHI_BUILD_DEBUG " Debug"
692#else
693#define INCHI_BUILD_DEBUG ""
694#endif
695#endif
696
697#ifndef INCHI_SRC_REV
698#if defined(_DEBUG)
699#define INCHI_SRC_REV "rev. 9b6f1414ebf3+"
700#else
701#define INCHI_SRC_REV ""
702#endif
703#endif
704
705#ifndef INCHI_BUILD_COMPILER
706
707#if defined(_MSC_VER)
708
709#if _MSC_VER > 1900
710#define INCHI_BUILD_COMPILER "MS VS 2017 or later"
711#elif _MSC_VER == 1900
712#define INCHI_BUILD_COMPILER "MS VS 2015"
713#elif _MSC_VER == 1800
714#define INCHI_BUILD_COMPILER "MS VS 2013"
715#elif _MSC_VER == 1700
716#define INCHI_BUILD_COMPILER "MS VS 2012"
717#elif _MSC_VER == 1600
718#define INCHI_BUILD_COMPILER "MS VS 2010"
719#elif _MSC_VER == 1500
720#define INCHI_BUILD_COMPILER "MS VS 2008"
721#elif _MSC_VER == 1400
722#define INCHI_BUILD_COMPILER "MS VS 2005"
723#elif _MSC_VER == 1310
724#define INCHI_BUILD_COMPILER "MS VS 2003"
725#elif _MSC_VER == 1300
726#define INCHI_BUILD_COMPILER "MS VS 2002"
727#elif _MSC_VER == 1200
728#define INCHI_BUILD_COMPILER "MS VS 6.0"
729#else
730#define INCHI_BUILD_COMPILER "MS VC++ 5.0 or earlier"
731#endif
732
733#else
734
735#if defined(__GNUC__)
736#define INCHI_BUILD_COMPILER "gcc " __VERSION__ ""
737#else
738#define INCHI_BUILD_COMPILER ""
739#endif
740#endif
741
742#endif
743
744#endif /* _UTIL_H_ */
unsigned short AT_NUMB
Definition ichisize.h:45
signed short NUM_H
Definition ichisize.h:49
signed char S_CHAR
Definition inchi_api.h:113
unsigned char U_CHAR
Definition inchi_api.h:114
Definition ichicant.h:338
Definition util.h:616
double dAtMass
Definition util.h:625
int nNormAtMass
Definition util.h:623
int nElNegPauling10
Definition util.h:629
const char * szElName
Definition util.h:618
int nAtMass
Definition util.h:621
int bSkipAddingH
Definition util.h:632
S_CHAR cValence[NUM_ATOM_CHARGES][MAX_NUM_VALENCES]
Definition util.h:633
int nType
Definition util.h:627
Structure describing an input atom.
Definition inpdef.h:142
int num_of_H(inp_ATOM *at, int iat)
Get the number of hydrogens.
Definition util.c:1096
int get_element_chemical_symbol(int nAtNum, char *szElement)
Finds chemical symbol for element of given number.
Definition util.c:257
int get_el_type(int nPeriodicNum)
Get the el type object.
Definition util.c:647
int extract_H_atoms(char *elname, S_CHAR num_iso_H[])
Extract H atoms from element name.
Definition util.c:742
char * inchi__strnset(char *s, int val, size_t length)
Set a string to a specified value for a given length.
Definition util.c:1989
AT_NUMB * is_in_the_list(AT_NUMB *pathAtom, AT_NUMB nNextAtom, int nPathLen)
Checks if an atom is in the list/path.
Definition util.c:1027
int get_num_H(const char *elname, int inp_num_H, S_CHAR num_iso_H[], int charge, int radical, int chem_bonds_valence, int atom_input_valence, int bAliased, int bDoNotAddH, int bHasMetalNeighbor)
Get the number of attached hydrogens.
Definition util.c:830
int detect_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int num_H, int num_bonds)
Output valence that does not fit any known valences.
Definition util.c:588
int if_skip_add_H(int nPeriodicNum)
Check if no H addition allowed.
Definition util.c:396
int is_el_a_metal(int nPeriodicNum)
Check if an element is a metal.
Definition util.c:656
int extract_charges_and_radicals(char *elname, int *pnRadical, int *pnCharge)
Extract charges and radicals from element name.
Definition util.c:668
void extract_inchi_substring(char **buf, const char *str, size_t slen)
Extract InChI substring embedded into a longer string.
Definition util.c:1827
void WriteCoord(char *str, double x)
Write coordinate (double) to string.
int extract_orig_nums_from_auxinfo_string(char *saux, int *orig)
Parse AuxInfostring and get a list of original atom numbers orig[cano_num].
Definition runichi2.c:1389
int nNoMetalNeighIndex(inp_ATOM *at, int at_no)
Get the index of the first element that is not a metal.
Definition util.c:1353
int get_atomic_mass(const char *elname)
Get the atomic mass object.
Definition util.c:1008
int mystrncpy(char *target, const char *source, unsigned maxlen)
Copies up to maxlen characters INCLUDING end null from source to target. Fills out the rest of the ta...
Definition util.c:1727
int has_other_ion_in_sphere_2(inp_ATOM *at, int iat, int iat_ion_neigh)
Check whether an atom has ion neighbors within sphere (breath first search (BFS) up to r=2)
Definition util.c:1168
double inchi_strtod(const char *str, const char **p)
Convert string to double.
Definition ichiprt2.c:2606
int inchi_stricmp(const char *s1, const char *s2)
Case-insensitive string comparison.
Definition util.c:1962
int nBondsValenceInpAt(const inp_ATOM *at, int *nNumAltBonds, int *nNumWrongBonds)
Gets the number of bond valences.
Definition ichi_bns.c:1672
void remove_one_lf(char *p)
Remove one line feed character from the end of a string.
Definition util.c:1707
char * lrtrim(char *p, int *nLen)
Trim leading and trailing spaces from a string.
Definition util.c:1771
int extract_stereo_info_from_inchi_string(char *sinchi, int nat, int *orig, int *at_stereo_mark)
Extract stereo information from InChI string.
Definition ichiread.c:12499
#define NUM_ATOM_CHARGES
Definition util.h:609
int get_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int num_H, int num_bonds)
Output valence needed to unambiguosly reconstruct bonds.
Definition util.c:422
int nNoMetalOtherNeighIndex(inp_ATOM *at, int at_no, int cur_neigh)
Get the index of an element that is not a metal excluding a given index.
Definition util.c:1372
int extract_all_backbone_bonds_from_inchi_string(char *sinchi, int *n_all_bkb_orig, int *orig, int *all_bkb_orig)
Extract all backbone bonds from InChI string.
Definition ichiread.c:12547
int get_endpoint_valence_KET(U_CHAR el_number)
Get the endpoint valence KET object.
Definition util.c:1497
const int nElDataLen
Definition util.c:244
int nBondsValToMetal(inp_ATOM *at, int iat)
Gets the number of bond valences to a metal atom.
Definition util.c:1067
int get_el_valence(int nPeriodicNum, int charge, int val_num)
Get reference value of atom valence at given charge.
Definition util.c:407
#define MAX_NUM_VALENCES
Definition util.h:610
long inchi_strtol(const char *str, const char **p, int base)
Convert string to long integer.
Definition ichiprt2.c:2587
int is_matching_any_delim(char c, char *delims)
Check if a character is in the list of possible delimiters.
Definition util.c:1677
int * is_in_the_ilist(int *pathAtom, int nNextAtom, int nPathLen)
Checks if an integer is in the list/path.
Definition util.c:1039
int get_endpoint_valence(U_CHAR el_number)
Get the endpoint valence object.
Definition util.c:1476
int nNoMetalOtherNeighIndex2(inp_ATOM *at, int at_no, int cur_neigh, int cur_neigh2)
Get the index of an element that is not a metal excluding a 2 given indexes.
Definition util.c:1393
int extract_nonstereo_eq_classes_from_auxinfo_string(char *saux, int nat, int *orig, int *have_eclass_info, int *eclass, int *eclass_by_origs)
Parse AuxInfostring and get non-stereo equivalence classes.
Definition runichi2.c:1427
char * inchi__strdup(const char *string)
Duplicate a string.
Definition util.c:2002
U_CHAR ion_el_group(int el)
Get the element group of an element. The base element rather than the periodic group is used to aid r...
Definition util.c:1118
int is_ilist_inside(int *ilist, int nlist, int *ilist2, int nlist2)
Checks if one list of integers is inside another list (ilist in ilist2)
Definition util.c:1052
int get_periodic_table_number(const char *elname)
Get the periodic table number object.
Definition util.c:332
int nNoMetalBondsValence(inp_ATOM *at, int at_no)
Returns the number of non-metal bond valences.
Definition util.c:1287
void remove_trailing_spaces(char *p)
Remove trailing spaces from a string.
Definition util.c:1695
int read_upto_delim(char **pstring, char *field, int maxlen, char *delims)
Read up to any delimiter from the string.
Definition util.c:1625
int has_other_ion_neigh(inp_ATOM *at, int iat, int iat_ion_neigh)
Check whether an atom has ion neighbors.
Definition util.c:1142
int get_atomic_mass_from_elnum(int nAtNum)
Get the atomic mass from elnum object.
Definition util.c:975
int SetBitFree(struct tagCANON_GLOBALS *pCG)
Frees bit string in canonicalisation data structure.
Definition ichican2.c:3259
const int ERR_ELEM
Definition util.c:243
int needed_unusual_el_valence(int nPeriodicNum, int charge, int radical, int bonds_valence, int actual_bonds_val, int num_H, int num_bonds)
Output valence needed to unambiguosly reconstruct number of H.
Definition util.c:486
int bHeteroAtomMayHaveXchgIsoH(inp_ATOM *atom, int iat)
Checks whether a hetero atom may have exchangeable isotopic hydrogens.
Definition strutil.c:6743
int nNoMetalNumBonds(inp_ATOM *at, int at_no)
Returns the number of non-metal bonds.
Definition util.c:1220
struct tagElData ELDATA
int MakeRemovedProtonsString(int nNumRemovedProtons, NUM_H *nNumExchgIsotopicH, NUM_H *nNumRemovedProtonsIsotopic, int bIsotopic, char *szRemovedProtons, int *num_removed_iso_H)
Creates a string from the removed protons (?)
void extract_auxinfo_substring(char **buf, const char *str, size_t slen)
Extract AuxInfo substring embedded into a longer string.
Definition util.c:1888
int inchi_memicmp(const void *p1, const void *p2, size_t length)
Compare two memory blocks in a case-insensitive manner.
Definition util.c:1938
int dotify_non_printable_chars(char *line)
Replace non-ASCII characters with '.',.
Definition util.c:1597
void mystrrev(char *p)
Reverse a string in place.
Definition ichimake.c:2089
int get_element_or_pseudoelement_symbol(int nAtNum, char *szElement)
Finds symbol for element of given number. Accounts for (translates)pseudoelements.
Definition util.c:284
int normalize_string(char *name)
Normalize string (remove leading & trailing spaces, replace consecutive spaces with a single space,...
Definition util.c:1556