;; Inferring electron dot structures using logic programming
(ns ai.logical.lewis)
(require '[clojure.core.logic :as logic]
'[clojure.core.logic.fd :as fd])
(def set* (comp clojure.core/set distinct))
;;;;;; -------------------------- CHEMICAL FACTS
(def valence-electrons
"How many valence electrons does this element usually have?"
{:H 1, :C 4, :N 5, :O 6, :F 7,
:P 5 :S 6 :Cl 7})
(defn full-octet
"In this element, how many electrons form a complete octet? (always 8, except for :H and :He)."
[elt]
(cond (= elt :H) 2
(= elt :He) 2
:else 8))
;;;; ----------- CONVERT FORMULA STRINGS INTO LISTS OF ATOMIC AMOUNTS
(defn multiply-vals
"Multiply all the values in the map m by the amount c."
[c m]
(into {} (map (fn [[k v]] [k (* c v)]) m)))
(defn parse-int
"Convert a string representation of a number into that number. If string is empty, use default val instead (default is 1)."
([s default]
(try (Integer/parseInt s)
(catch NumberFormatException e default)))
([s]
(parse-int s 1)))
(defn str-to-compound
"Convert a well-formed molecular formula such as C12(H2O)11 into a compound
object."
[s]
(loop [s s, compounds []]
(if (re-find #"^\s*$" s)
;; base case: the string is finished
(apply merge-with + {} compounds)
(if-let [[_ species amount s-rest]
;; Ordinary species amount like Pb or I2 in PbI2
(re-find #"^\s*([A-Z][a-z]*)(\d*)(.*)" s)]
(recur s-rest
(conj compounds {(keyword species)
(parse-int amount)}))
;; Grouping like the (H2O)11 in C12(H2O)11
(if-let [[_ subcompound suffix s-rest]
(re-find #"^\s*\((.*)\)(\d*)\s*(.*)$" s)]
(recur s-rest
(conj compounds
(multiply-vals (parse-int suffix)
(str-to-compound subcompound))))
(if-let [[_ prefix s-rest]
(re-find #"^\s*(\d+)(.*)" s)]
;; Prefix like the 3 in 3H2O
(recur nil
(conj compounds
(multiply-vals (parse-int prefix)
(str-to-compound s-rest))))))))))
(comment :example
(str-to-compound "C6(H2O)6"))
(defn atoms
"Convert a molecular formula into a list of atoms with unique ids."
[formula-str]
(let [compound (str-to-compound formula-str)]
(for [[species amount] compound
_ (range amount)] ;; for every atom in the compound
{:species species,
:valence (get valence-electrons species)
:full-octet (if (or (= species :H) (= species :He)) 2 8)
:id (gensym "atom")})))
(comment :example
(atoms "H2SO4"))
;;;;;; --------------- LEWIS STRUCTURE REPRESENTATIONS
;; In this program, a Lewis structure is a map whose keys are pairs of atoms.
;; The value of a key [a, b] is the size of the bond between atoms a and b (e.g. 0, 1, 2, 3).
;; (A strength of 0 means the atoms are not connected at all.)
;; As a special case, the key [a, a] (same atom repeated twice) records the number of
;; nonbonding electrons that atom a has.
;; Equivalently, you can think of a Lewis structure as a table/matrix of numbers,
;; with one row and one column for every atom in the molecule.
(defn get-atoms
"Return a list of atoms in the structure."
[structure]
(distinct (apply concat (keys structure))))
(defn get-bond-strengths
"Return the bonds of this atom as a list of integers."
[structure atom]
(for [other-atom (get-atoms structure) :when (not= atom other-atom)]
(get structure [atom other-atom] 0)))
(defn get-nonbonding-electrons
"Return the nonbonding electrons in this atom. A single integer."
[structure atom]
(get structure [atom atom] 0))
(defn formal-charge
"Compute the formal charge of this atom as the difference between its
default valence and the number of bonding/nonbonding electrons it has."
[structure atom]
(apply - (:valence atom)
(get-nonbonding-electrons structure atom)
(get-bond-strengths structure atom)))
(defn absolute-formal-charge
"Compute the absolute formal charge of a lewis structure: the sum of the absolute values of the atom formal charges."
[structure]
(reduce + (map (comp #(Math/abs %) (partial formal-charge structure)) (get-atoms structure))))
(defn distinct-pairs
"Return a list of all distinct pairs [x y] from coll. That is, only one of
[x y] and [y x] may appear in the result."
[coll]
(for [i (range (count coll))
j (range (count coll))
:when (<= i j)]
[(nth coll i) (nth coll j)]))
(defn all-electrons
"Return a list of the electron count at each site in this structure: every
bond (0,1,2,3) and every nonbonding electron count for every atom.
Note that each bond counts twice.
"
[structure]
;; (map (partial get structure)
;; (distinct-pairs (get-atoms structure)))
(map (partial get structure)
(for [a (get-atoms structure),
b (get-atoms structure)]
[a b])))
;; (map (partial pretty-table) (minimals total-formal-charge (lewis-structures "CO3", 0)))
(defn minimals
"Return a list of all the elements x in coll where the value of (f x) is minimal."
[f coll]
(loop [agenda coll, best '(), best-val nil]
(if-let [x (first agenda)]
(let [y (f x)]
(cond
(or (nil? best-val) (< y best-val))
(recur (rest agenda) (conj '() x) y)
(> y best-val)
(recur (rest agenda) best best-val)
(= y best-val)
(recur (rest agenda) (conj best x) best-val)))
best)))
;;;;;; -------------------------- LOGIC PROGRAMMING
;; The sum of two nonnegative integers is 8. What are the integers?
(comment :example
;; https://nextjournal.com/helios/a-core-logic-primer
(let [[a b] [(logic/lvar) (logic/lvar)]]
(logic/run* [q]
(logic/== q [a b])
(logic/everyg #(fd/in % (fd/interval 0 8)) [a b])
(fd/+ a b 8)
)))
(defn sum°
"Require the list of variables to sum to the given amount."
[vars sum]
(logic/fresh [vhead vtail run-sum]
(logic/conde
[(logic/== vars ()) (logic/== sum 0)]
[(logic/conso vhead vtail vars)
(fd/+ vhead run-sum sum)
(sum° vtail run-sum)])))
(defn absent°
"A relation where lst is a collection, such that l doesn't contain x."
[x lst]
(logic/fresh [head tail]
(logic/conde [(logic/== lst '())]
[(logic/conso head tail lst)
(logic/!= head x)
(absent° x tail)])))
(comment :example ;; q is a variable from the interval 1..5 which is missing from the list [3 1 4].
(run* [q] (fd/in q (fd/interval 1 5))
(absent° q [3 1 4])))
;; For debugging purposes. Just add "ignore" to the beginning of a constraint to disable it.
;; For example, (membero x l) becomes (ignore membero x l), a constraint which always succeeds.
(def ignore (constantly logic/succeed))
(defn path-predicate*
[edges]
(let [nodes (distinct (apply concat edges))]
"In a given graph, require that path is a loop-free sequence of edges joining
the start and end nodes. For chemistry, require that the atoms
belong to the same molecule."
(fn path-between°
([path start end]
(path-between° path start end '()))
([path start end seen]
(logic/fresh [tail ttail neighbor]
(logic/conde
;; base case: singleton path
[(logic/membero start nodes)
(logic/== start end)
(logic/conso start '() path)]
;; base case: single edge
[(logic/membero path edges)
(logic/== path [start end])
]
;; recursive case: consecutive nodes are neighbors
[
(logic/membero [start neighbor] edges)
(logic/conso start tail path)
(logic/conso neighbor ttail tail)
(logic/!= ttail '()) ;; recursive cases only (path length > 2)
(absent° start seen)
(absent° neighbor seen)
(path-between° tail neighbor end (distinct (cons start seen)))
]))))))
(do :example
(logic/run* [q] ((path-predicate* [[1 2] [2 3] [3 4] [3 5] [4 6] [5 6] [6 7] [4 2]]) q 1 7))
)
(defn path-predicate
"Given a structure, produce (path-between° path start end) which is true
whenever start and end are connected by a path of nonzero bonds."
[structure]
(let [nodes (distinct (apply concat (keys structure)))]
"In a given graph, require that path is a loop-free sequence of edges joining
the start and end nodes. For chemistry, require that the atoms
belong to the same molecule."
(fn path-between°
([path start end]
(path-between° path start end '()))
([path start end seen]
(logic/fresh [tail ttail neighbor]
(logic/conde
;; base case: singleton path
[(logic/membero start nodes)
(logic/== start end)
(logic/conso start '() path)]
;; base case: single edge
[(logic/== path [start end])
(fd/> (get structure [start end]) 0)
]
;; recursive case: consecutive nodes are neighbors
[
(fd/> (get structure [start neighbor]) 0)
(logic/conso start tail path)
(logic/conso neighbor ttail tail)
(logic/!= ttail '()) ;; recursive cases only (path length > 2)
(absent° start seen)
(absent° neighbor seen)
(path-between° tail neighbor end (distinct (cons start seen)))
]))))))
(defn lewis-structures
"Given a formula string, use logic programming to find a list of all valid
lewis structures. If an ion charge is given, treats the compound as a
charged ion when counting electrons."
([formula-str] (lewis-structures formula-str 0))
([formula-str ion-charge]
(let [atoms (atoms formula-str)
total-valence-count (reduce + (map :valence atoms))
;; A table of variables to be filled in using logic programming.
;; The filled-in result will represent a lewis structure.
lewis-structure-problem
(apply merge
(for [atom-1 atoms
atom-2 atoms]
{[atom-1 atom-2]
(logic/lvar)}))
all-electrons (all-electrons lewis-structure-problem)
same-molecule° (path-predicate lewis-structure-problem)
]
(logic/run* [answer]
(logic/== answer lewis-structure-problem)
;; constraint: the total number of electrons must be equal to the combined number of valence
;; electrons minus the charge of this compound.
(sum° all-electrons
(- total-valence-count ion-charge))
;; constraint: every bond size must range between 0 and 3
(logic/everyg
(fn [bond-size]
(fd/in bond-size (fd/interval 0 3)))
(for [atom-1 atoms
bond (get-bond-strengths lewis-structure-problem atom-1)]
bond))
;; constraint: every atom must have a full octet.
(logic/everyg
#(sum° (concat (get-bond-strengths lewis-structure-problem %)
(get-bond-strengths lewis-structure-problem %)
[(get-nonbonding-electrons lewis-structure-problem %)]
)
(:full-octet %))
atoms)
;; constraint: every atom must have at least one bond.
;; otherwise, our dot structure might result in separate molecules.
(logic/everyg
(fn [a]
(logic/fresh [total-bond-count]
(sum° (get-bond-strengths lewis-structure-problem a) total-bond-count)
(fd/> total-bond-count 0)))
atoms)
;; constraint: all bonds are covalent: the electrons in bond [a b]
;; are equal to the electrons in bond [b a].
(logic/everyg (fn [[a b]] (logic/== (get lewis-structure-problem [a b])
(get lewis-structure-problem [b a])))
(distinct-pairs (get-atoms lewis-structure-problem)))
;; constraint: practically speaking, we can't have more electrons around any single atom
;; than the total number of electrons available.
(logic/everyg
(fn [bond-size]
(fd/in bond-size (fd/interval 0 (- total-valence-count ion-charge))))
all-electrons)
))))
(defn pretty-table
"Print out a tabular description of a lewis structure."
[structure]
(let [atoms (reverse (sort-by :species (distinct (apply concat (keys structure)))))]
;; HEADER
(println (clojure.string/join (interleave (repeat "\t") (map :species atoms))))
(do (doall
(for [a atoms]
(println (:species a) " " (clojure.string/join (interleave (for [b atoms] (#(if (zero? %) "" %) (get structure [a b]))) (repeat "\t")) ))
)
)
(println (map (partial formal-charge structure) atoms))
(println)
)))
(defn pretty-text
"Print out a verbal description of a lewis structure, as output by the lewis-structures fn."
[structure]
(let [atoms (reverse (sort-by :species (distinct (apply concat (keys structure)))))
nickname
(loop [nicknames {}, tally {}, agenda atoms]
(if-let [atom (first agenda)]
(let [species (:species atom)]
(recur
(assoc nicknames atom (str species " #" (inc (get tally :anything 0))))
(assoc tally :anything (inc (get tally :anything 0)))
(rest agenda)))
nicknames))
]
(println (clojure.string/join "" (repeat 30 "-")) )
(doall
(for [a atoms]
(let [num-bonding-electrons (reduce + (get-bond-strengths structure a))
num-nonbonding-electrons (get-nonbonding-electrons structure a)]
(println "\nAtom" (get nickname a))
(println "\t" (get structure [a a]) " nonbonding electrons.")
(doall (for [b atoms :when (not= a b)]
(println "\t" (get structure [a b]) " bond with " (nickname b))))
(println "\n\t" (+ num-bonding-electrons
num-bonding-electrons
num-nonbonding-electrons)
"valence electrons = "
num-nonbonding-electrons
"nonbonding + "
"2 x" num-bonding-electrons "bonding electrons" )
(println "\t" (formal-charge structure a)
"formal charge = "
(:valence a)
"original valence - "
num-nonbonding-electrons
"nonbonding - "
num-bonding-electrons "bonding electrons" )
)
))))
(do :examples
[
(map pretty-table (minimals absolute-formal-charge (lewis-structures "CO2")))
(map pretty-table (minimals absolute-formal-charge (lewis-structures "N2")))
(map pretty-text (minimals absolute-formal-charge (lewis-structures "CN" -1)))
]
)