diff --git a/examples/asia_bayes_net.py b/examples/asia_bayes_net.py new file mode 100644 index 00000000..82d9e493 --- /dev/null +++ b/examples/asia_bayes_net.py @@ -0,0 +1,80 @@ +# Asia Bayes Net +# Contact: Jacob Schreiber +# jmschr@cs.washington.edu + +''' +The Asia Bayesian Network. See a description here: +http://www.norsys.com/tutorials/netica/secA/tut_A1.htm +''' + +from pomegranate import * + +# Create the distributions +asia = DiscreteDistribution({ 'True' : 0.5, 'False' : 0.5 }) +tuberculosis = ConditionalDiscreteDistribution({ + 'True' : DiscreteDistribution({ 'True' : 0.2, 'False' : 0.80 }), + 'False' : DiscreteDistribution({ 'True' : 0.01, 'False' : 0.99 }) + }, [asia]) + +smoking = DiscreteDistribution({ 'True' : 0.5, 'False' : 0.5 }) +lung = ConditionalDiscreteDistribution({ + 'True' : DiscreteDistribution({ 'True' : 0.75, 'False' : 0.25 }), + 'False' : DiscreteDistribution({ 'True' : 0.02, 'False' : 0.98 }) + }, [smoking] ) +bronchitis = ConditionalDiscreteDistribution({ + 'True' : DiscreteDistribution({ 'True' : 0.92, 'False' : 0.08 }), + 'False' : DiscreteDistribution({ 'True' : 0.03, 'False' : 0.97}) + }, [smoking] ) + +tuberculosis_or_cancer = ConditionalDiscreteDistribution({ + 'True' : { 'True' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), + 'False' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), + }, + 'False' : { 'True' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), + 'False' : DiscreteDistribution({ 'True' : 0.0, 'False' : 1.0 }) + } + }, [tuberculosis, lung] ) + +xray = ConditionalDiscreteDistribution({ + 'True' : DiscreteDistribution({ 'True' : .885, 'False' : .115 }), + 'False' : DiscreteDistribution({ 'True' : 0.04, 'False' : 0.96 }) + }, [tuberculosis_or_cancer] ) + +dyspnea = ConditionalDiscreteDistribution({ + 'True' : { 'True' : DiscreteDistribution({ 'True' : 0.96, 'False' : 0.04 }), + 'False' : DiscreteDistribution({ 'True' : 0.89, 'False' : 0.11 }) + }, + 'False' : { 'True' : DiscreteDistribution({ 'True' : 0.82, 'False' : 0.18 }), + 'False' : DiscreteDistribution({ 'True' : 0.4, 'False' : 0.6 }) + } + }, [tuberculosis_or_cancer, bronchitis]) + +# Make the states. Note the name can be different than the name of the state +# can be different than the name of the distribution +s0 = State( asia, name="asia" ) +s1 = State( tuberculosis, name="tuberculosis" ) +s2 = State( smoking, name="smoker" ) +s3 = State( lung, name="cancer" ) +s4 = State( bronchitis, name="bronchitis" ) +s5 = State( tuberculosis_or_cancer, name="TvC" ) +s6 = State( xray, name="xray" ) +s7 = State( dyspnea, name='dyspnea' ) + +# Create the Bayesian network +network = BayesianNetwork( "asia" ) +network.add_states([ s0, s1, s2, s3, s4, s5, s6, s7 ]) +network.add_transition( s0, s1 ) +network.add_transition( s1, s5 ) +network.add_transition( s2, s3 ) +network.add_transition( s2, s4 ) +network.add_transition( s3, s5 ) +network.add_transition( s5, s6 ) +network.add_transition( s5, s7 ) +network.add_transition( s4, s7 ) +network.bake() + +print "Has tuberculosis, is not a smoker, 80-20 chance he has bronchitis" +observations = { 'tuberculosis' : 'True', 'smoker' : 'False', + 'bronchitis' : DiscreteDistribution({ 'True' : 0.8, 'False' : 0.2 }) } +beliefs = map( str, network.forward_backward( observations ) ) +print "\n".join( "{}\t\t{}".format( state.name, belief ) for state, belief in zip( network.states, beliefs ) ) \ No newline at end of file diff --git a/examples/fsm_test.py b/examples/fsm_test.py new file mode 100644 index 00000000..b8426b85 --- /dev/null +++ b/examples/fsm_test.py @@ -0,0 +1,43 @@ +# FSM test +# Contact: Jacob Schreiber +# jmschr@cs.washington.edu + +from pomegranate import * + +# Create the states in the same way as you would an HMM +a = State( NormalDistribution( 5, 1 ), "a" ) +b = State( NormalDistribution( 23, 1 ), "b" ) +c = State( NormalDistribution( 100, 1 ), "c" ) + +# Create a FiniteStateMachine object +model = FiniteStateMachine( "test" ) + +# Add the states in the same way +model.add_states( [a, b, c] ) + +# Add the transitions in the same manner +model.add_transition( model.start, a, 1.0 ) +model.add_transition( a, a, 0.33 ) +model.add_transition( a, b, 0.33 ) +model.add_transition( b, b, 0.5 ) +model.add_transition( b, a, 0.5 ) +model.add_transition( a, c, 0.33 ) +model.add_transition( c, a, 0.5 ) +model.add_transition( c, c, 0.5 ) + +# Bake the model in the same way +model.bake( verbose=True ) + +# Take a sequence of observations +seq = [ 5, 5, 5, 5, 23, 23, 5, 23, 23, 100, 23, 23, 23, 23, 5, 5, 100, 5, 23 ] + +# Print out the model +print "\n".join( state.name for state in model.states ) + +# Print out where you start in the model +print model.current_state.name + +# Print out where the model is for each step +for symbol in seq: + model.step( symbol ) + print symbol, model.current_state.name \ No newline at end of file diff --git a/examples/hmm_example.py b/examples/hmm_example.py new file mode 100644 index 00000000..beb6ab02 --- /dev/null +++ b/examples/hmm_example.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python2.7 +# example.py: Yet Another Hidden Markov Model library +# Contact: Jacob Schreiber ( jmschreiber91@gmail.com ) + +""" +A simple example highlighting how to build a model using states, add +transitions, and then run the algorithms, including showing how training +on a sequence improves the probability of the sequence. +""" + +import random +from pomegranate import * +from pomegranate import HiddenMarkovModel as Model + +random.seed(0) +model = Model(name="ExampleModel") +distribution = UniformDistribution(0.0, 1.0) +state = State(distribution, name="uniform") +state2 = State(NormalDistribution(0, 2), name="normal") +silent = State(None, name="silent") +model.add_state(state) +model.add_state(state2) + +model.add_transition(state, state, 0.4) +model.add_transition(state, state2, 0.4) +model.add_transition(state2, state2, 0.4) +model.add_transition(state2, state, 0.4) + +model.add_transition(model.start, state, 0.5) +model.add_transition(model.start, state2, 0.5) +model.add_transition(state, model.end, 0.2) +model.add_transition(state2, model.end, 0.2) + +model.bake() +sequence = model.sample() +print sequence +print +print model.forward(sequence)[ len(sequence), model.end_index ] +print model.backward(sequence)[0,model.start_index] +print +trans, ems = model.forward_backward(sequence) +print trans +print ems +print +model.train( [ sequence ] ) + +print +print model.forward(sequence)[ len(sequence), model.end_index ] +print model.backward(sequence)[0,model.start_index] +print +trans, ems = model.forward_backward(sequence) +print trans +print ems +print \ No newline at end of file diff --git a/examples/infinite_hmm.py b/examples/infinite_hmm.py new file mode 100644 index 00000000..4683f286 --- /dev/null +++ b/examples/infinite_hmm.py @@ -0,0 +1,85 @@ +# infinite_hmm_sampling.py +# Contact: Jacob Schreiber +# jmschreiber91@gmail.com + +''' +This example shows how to use yahmm to sample from an infinite HMM. The premise +is that you have an HMM which does not have transitions to the end state, and +so can continue on forever. This is done by not adding transitions to the end +state. If you bake a model with no transitions to the end state, you get an +infinite model, with no extra work! This change is passed on to all the +algorithms. +''' + +from pomegranate import * +from pomegranate import HiddenMarkovModel as Model +import itertools as it +import numpy as np + +# Define the states +s1 = State( NormalDistribution( 5, 2 ), name="S1" ) +s2 = State( NormalDistribution( 15, 2 ), name="S2" ) +s3 = State( NormalDistribution( 25, 2 ), name="S3 ") + +# Define the transitions +model = Model( "infinite" ) +model.add_transition( model.start, s1, 0.7 ) +model.add_transition( model.start, s2, 0.2 ) +model.add_transition( model.start, s3, 0.1 ) +model.add_transition( s1, s1, 0.6 ) +model.add_transition( s1, s2, 0.1 ) +model.add_transition( s1, s3, 0.3 ) +model.add_transition( s2, s1, 0.4 ) +model.add_transition( s2, s2, 0.4 ) +model.add_transition( s2, s3, 0.2 ) +model.add_transition( s3, s1, 0.05 ) +model.add_transition( s3, s2, 0.15 ) +model.add_transition( s3, s3, 0.8 ) +model.bake() + +sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ] + + +print model.is_infinite() + +print "Algorithms On Infinite Model" +sequence = [ 4.8, 5.6, 24.1, 25.8, 14.3, 26.5, 15.9, 5.5, 5.1 ] +print "Forward" +print model.forward( sequence ) + +print "\n".join( state.name for state in model.states ) +print "Backward" +print model.backward( sequence ) + +print "Forward-Backward" +trans, emissions = model.forward_backward( sequence ) +print trans +print emissions + +print "Viterbi" +prob, states = model.viterbi( sequence ) +print "Prob: {}".format( prob ) +print "\n".join( state[1].name for state in states ) +print +print "MAP" +prob, states = model.maximum_a_posteriori( sequence ) +print "Prob: {}".format( prob ) +print "\n".join( state[1].name for state in states ) + +print "Showing that sampling can reproduce the original transition probs." +print "Should produce a matrix close to the following: " +print " [ [ 0.60, 0.10, 0.30 ] " +print " [ 0.40, 0.40, 0.20 ] " +print " [ 0.05, 0.15, 0.80 ] ] " +print +print "Tranition Matrix From 100000 Samples:" +sample, path = model.sample( 100000, path=True ) +trans = np.zeros((3,3)) + +for state, n_state in it.izip( path[1:-2], path[2:-1] ): + state_name = float( state.name[1:] )-1 + n_state_name = float( n_state.name[1:] )-1 + trans[ state_name, n_state_name ] += 1 + +trans = (trans.T / trans.sum( axis=1 )).T +print trans \ No newline at end of file diff --git a/examples/logs b/examples/logs new file mode 100644 index 00000000..a85afb0e --- /dev/null +++ b/examples/logs @@ -0,0 +1,595 @@ +Downloading/unpacking pomegranate + Downloading pomegranate-0.0.2.zip + Running setup.py (path:c:\users\jacob\appdata\local\temp\pip_build_Jacob\pomegranate\setup.py) egg_info for package pomegranate + +Requirement already satisfied (use --upgrade to upgrade): cython>=0.20.1 in c:\anaconda\lib\site-packages (from pomegranate) +Requirement already satisfied (use --upgrade to upgrade): numpy>=1.8.0 in c:\anaconda\lib\site-packages (from pomegranate) +Requirement already satisfied (use --upgrade to upgrade): scipy>=0.13.3 in c:\anaconda\lib\site-packages (from pomegranate) +Requirement already satisfied (use --upgrade to upgrade): networkx>=1.8.1 in c:\anaconda\lib\site-packages (from pomegranate) +Requirement already satisfied (use --upgrade to upgrade): matplotlib>=1.3.1 in c:\anaconda\lib\site-packages (from pomegranate) +Installing collected packages: pomegranate + Running setup.py install for pomegranate + cythoning pomegranate/base.pyx to pomegranate\base.c + cythoning pomegranate/bayesnet.pyx to pomegranate\bayesnet.c + cythoning pomegranate/fsm.pyx to pomegranate\fsm.c + cythoning pomegranate/hmm.pyx to pomegranate\hmm.c + building 'pomegranate.pomegranate' extension + C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\base.c -o build\temp.win-amd64-2.7\Release\pomegranate\base.o + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\base.c:347: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + pomegranate\base.c: In function '__Pyx_RaiseArgtupleInvalid': + pomegranate\base.c:9786:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\base.c:9786:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + pomegranate\base.c:9786:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\base.c:9786:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\base.c: In function '__Pyx_RaiseTooManyValuesError': + pomegranate\base.c:10290:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\base.c:10290:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\base.c: In function '__Pyx_RaiseNeedMoreValuesError': + pomegranate\base.c:10296:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\base.c:10296:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\base.c:10296:18: warning: too many arguments for format [-Wformat-extra-args] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\base.c:347: + pomegranate\base.c: At top level: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + from pomegranate\base.c:348: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\bayesnet.c -o build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\bayesnet.c:347: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + pomegranate\bayesnet.c: In function '__Pyx_RaiseTooManyValuesError': + pomegranate\bayesnet.c:21722:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:21722:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c: In function '__Pyx_RaiseNeedMoreValuesError': + pomegranate\bayesnet.c:21728:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:21728:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\bayesnet.c:21728:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c: In function '__Pyx_RaiseArgtupleInvalid': + pomegranate\bayesnet.c:21902:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:21902:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + pomegranate\bayesnet.c:21902:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:21902:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + pomegranate\bayesnet.c:23452:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23452:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23452:26: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c:23504:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23504:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23504:20: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c: In function '__pyx_buffmt_parse_array': + pomegranate\bayesnet.c:23566:25: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23566:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + pomegranate\bayesnet.c:23566:25: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\bayesnet.c: In function '__Pyx_GetBufferAndValidate': + pomegranate\bayesnet.c:23747:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23747:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\bayesnet.c:23747:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\bayesnet.c:23747:7: warning: too many arguments for format [-Wformat-extra-args] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\bayesnet.c:347: + pomegranate\bayesnet.c: At top level: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + from pomegranate\bayesnet.c:348: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\fsm.c -o build\temp.win-amd64-2.7\Release\pomegranate\fsm.o + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\fsm.c:347: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + pomegranate\fsm.c: In function '__Pyx_RaiseArgtupleInvalid': + pomegranate\fsm.c:26885:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:26885:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + pomegranate\fsm.c:26885:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:26885:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__Pyx_RaiseTooManyValuesError': + pomegranate\fsm.c:26902:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:26902:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__Pyx_RaiseNeedMoreValuesError': + pomegranate\fsm.c:26908:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:26908:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\fsm.c:26908:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + pomegranate\fsm.c:27195:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27195:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27195:26: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c:27247:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27247:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27247:20: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__pyx_buffmt_parse_array': + pomegranate\fsm.c:27309:25: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27309:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + pomegranate\fsm.c:27309:25: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__Pyx_GetBufferAndValidate': + pomegranate\fsm.c:27490:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27490:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\fsm.c:27490:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:27490:7: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\fsm.c: In function '__Pyx_ValidateAndInit_memviewslice': + pomegranate\fsm.c:30229:22: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:30229:22: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\fsm.c:30229:22: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\fsm.c:30229:22: warning: too many arguments for format [-Wformat-extra-args] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\fsm.c:347: + pomegranate\fsm.c: At top level: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + from pomegranate\fsm.c:348: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\hmm.c -o build\temp.win-amd64-2.7\Release\pomegranate\hmm.o + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\hmm.c:347: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + pomegranate\hmm.c: In function '__Pyx_RaiseArgtupleInvalid': + pomegranate\hmm.c:49566:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49566:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + pomegranate\hmm.c:49566:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49566:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + pomegranate\hmm.c:49847:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49847:26: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49847:26: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c:49899:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49899:20: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49899:20: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__pyx_buffmt_parse_array': + pomegranate\hmm.c:49961:25: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:49961:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + pomegranate\hmm.c:49961:25: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__Pyx_GetBufferAndValidate': + pomegranate\hmm.c:50142:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:50142:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\hmm.c:50142:7: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:50142:7: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__Pyx_RaiseTooManyValuesError': + pomegranate\hmm.c:50416:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:50416:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__Pyx_RaiseNeedMoreValuesError': + pomegranate\hmm.c:50422:18: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:50422:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\hmm.c:50422:18: warning: too many arguments for format [-Wformat-extra-args] + pomegranate\hmm.c: In function '__Pyx_ValidateAndInit_memviewslice': + pomegranate\hmm.c:53734:22: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:53734:22: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + pomegranate\hmm.c:53734:22: warning: unknown conversion type character 'z' in format [-Wformat] + pomegranate\hmm.c:53734:22: warning: too many arguments for format [-Wformat-extra-args] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + from pomegranate\hmm.c:347: + pomegranate\hmm.c: At top level: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + from pomegranate\hmm.c:348: + C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + pomegranate\hmm.c: In function '__pyx_f_11pomegranate_11pomegranate_17HiddenMarkovModel__sample': + pomegranate\hmm.c:10433:18: warning: '__pyx_v_k' may be used uninitialized in this function [-Wmaybe-uninitialized] + C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -shared -s build\temp.win-amd64-2.7\Release\pomegranate\base.o build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o build\temp.win-amd64-2.7\Release\pomegranate\fsm.o build\temp.win-amd64-2.7\Release\pomegranate\hmm.o build\temp.win-amd64-2.7\Release\pomegranate\pomegranate.def -LC:\Anaconda\libs -LC:\Anaconda\PCbuild\amd64 -lpython27 -lmsvcr90 -o build\lib.win-amd64-2.7\pomegranate\pomegranate.pyd + build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o:bayesnet.c:(.text+0x29b84): multiple definition of `initpomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o:bayesnet.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + build\temp.win-amd64-2.7\Release\pomegranate\fsm.o:fsm.c:(.text+0x519e5): multiple definition of `initpomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + build\temp.win-amd64-2.7\Release\pomegranate\fsm.o:fsm.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + build\temp.win-amd64-2.7\Release\pomegranate\hmm.o:hmm.c:(.text+0xd80c9): multiple definition of `initpomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + build\temp.win-amd64-2.7\Release\pomegranate\hmm.o:hmm.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + collect2.exe: error: ld returned 1 exit status + warning: pomegranate\bayesnet.pyx:192:31: Index should be typed for more efficient access + error: command 'gcc' failed with exit status 1 + Complete output from command C:\Anaconda\python.exe -c "import setuptools, tokenize;__file__='c:\\users\\jacob\\appdata\\local\\temp\\pip_build_Jacob\\pomegranate\\setup.py';exec(compile(getattr(tokenize, 'open', open)(__file__).read().replace('\r\n', '\n'), __file__, 'exec'))" install --record c:\users\jacob\appdata\local\temp\pip-6w7uwt-record\install-record.txt --single-version-externally-managed --compile: + running install + +running build + +running build_py + +creating build + +creating build\lib.win-amd64-2.7 + +creating build\lib.win-amd64-2.7\pomegranate + +copying pomegranate\__init__.py -> build\lib.win-amd64-2.7\pomegranate + +running build_ext + +cythoning pomegranate/base.pyx to pomegranate\base.c + +cythoning pomegranate/bayesnet.pyx to pomegranate\bayesnet.c + +cythoning pomegranate/fsm.pyx to pomegranate\fsm.c + +cythoning pomegranate/hmm.pyx to pomegranate\hmm.c + +building 'pomegranate.pomegranate' extension + +creating build\temp.win-amd64-2.7 + +creating build\temp.win-amd64-2.7\Release + +creating build\temp.win-amd64-2.7\Release\pomegranate + +C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\base.c -o build\temp.win-amd64-2.7\Release\pomegranate\base.o + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\base.c:347: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +pomegranate\base.c: In function '__Pyx_RaiseArgtupleInvalid': + +pomegranate\base.c:9786:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\base.c:9786:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + +pomegranate\base.c:9786:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\base.c:9786:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\base.c: In function '__Pyx_RaiseTooManyValuesError': + +pomegranate\base.c:10290:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\base.c:10290:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\base.c: In function '__Pyx_RaiseNeedMoreValuesError': + +pomegranate\base.c:10296:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\base.c:10296:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\base.c:10296:18: warning: too many arguments for format [-Wformat-extra-args] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\base.c:347: + +pomegranate\base.c: At top level: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + + from pomegranate\base.c:348: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + +C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\bayesnet.c -o build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\bayesnet.c:347: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +pomegranate\bayesnet.c: In function '__Pyx_RaiseTooManyValuesError': + +pomegranate\bayesnet.c:21722:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:21722:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c: In function '__Pyx_RaiseNeedMoreValuesError': + +pomegranate\bayesnet.c:21728:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:21728:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\bayesnet.c:21728:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c: In function '__Pyx_RaiseArgtupleInvalid': + +pomegranate\bayesnet.c:21902:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:21902:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + +pomegranate\bayesnet.c:21902:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:21902:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + +pomegranate\bayesnet.c:23452:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23452:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23452:26: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c:23504:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23504:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23504:20: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c: In function '__pyx_buffmt_parse_array': + +pomegranate\bayesnet.c:23566:25: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23566:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + +pomegranate\bayesnet.c:23566:25: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\bayesnet.c: In function '__Pyx_GetBufferAndValidate': + +pomegranate\bayesnet.c:23747:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23747:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\bayesnet.c:23747:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\bayesnet.c:23747:7: warning: too many arguments for format [-Wformat-extra-args] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\bayesnet.c:347: + +pomegranate\bayesnet.c: At top level: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + + from pomegranate\bayesnet.c:348: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + +C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\fsm.c -o build\temp.win-amd64-2.7\Release\pomegranate\fsm.o + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\fsm.c:347: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +pomegranate\fsm.c: In function '__Pyx_RaiseArgtupleInvalid': + +pomegranate\fsm.c:26885:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:26885:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + +pomegranate\fsm.c:26885:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:26885:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__Pyx_RaiseTooManyValuesError': + +pomegranate\fsm.c:26902:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:26902:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__Pyx_RaiseNeedMoreValuesError': + +pomegranate\fsm.c:26908:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:26908:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\fsm.c:26908:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + +pomegranate\fsm.c:27195:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27195:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27195:26: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c:27247:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27247:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27247:20: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__pyx_buffmt_parse_array': + +pomegranate\fsm.c:27309:25: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27309:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + +pomegranate\fsm.c:27309:25: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__Pyx_GetBufferAndValidate': + +pomegranate\fsm.c:27490:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27490:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\fsm.c:27490:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:27490:7: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\fsm.c: In function '__Pyx_ValidateAndInit_memviewslice': + +pomegranate\fsm.c:30229:22: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:30229:22: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\fsm.c:30229:22: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\fsm.c:30229:22: warning: too many arguments for format [-Wformat-extra-args] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\fsm.c:347: + +pomegranate\fsm.c: At top level: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + + from pomegranate\fsm.c:348: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + +C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Anaconda\lib\site-packages\numpy\core\include -IC:\Anaconda\include -IC:\Anaconda\PC -c pomegranate\hmm.c -o build\temp.win-amd64-2.7\Release\pomegranate\hmm.o + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarraytypes.h:1761:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:17, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\hmm.c:347: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h:12:9: note: #pragma message: C:\Anaconda\lib\site-packages\numpy\core\include/numpy/npy_1_7_deprecated_api.h(12) : Warning Msg: Using deprecated NumPy API, disable it by #defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + +pomegranate\hmm.c: In function '__Pyx_RaiseArgtupleInvalid': + +pomegranate\hmm.c:49566:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49566:18: warning: format '%s' expects argument of type 'char *', but argument 5 has type 'Py_ssize_t' [-Wformat] + +pomegranate\hmm.c:49566:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49566:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__Pyx_BufFmt_ProcessTypeChunk': + +pomegranate\hmm.c:49847:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49847:26: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49847:26: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c:49899:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49899:20: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49899:20: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__pyx_buffmt_parse_array': + +pomegranate\hmm.c:49961:25: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:49961:25: warning: format '%d' expects argument of type 'int', but argument 3 has type 'size_t' [-Wformat] + +pomegranate\hmm.c:49961:25: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__Pyx_GetBufferAndValidate': + +pomegranate\hmm.c:50142:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:50142:7: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\hmm.c:50142:7: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:50142:7: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__Pyx_RaiseTooManyValuesError': + +pomegranate\hmm.c:50416:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:50416:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__Pyx_RaiseNeedMoreValuesError': + +pomegranate\hmm.c:50422:18: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:50422:18: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\hmm.c:50422:18: warning: too many arguments for format [-Wformat-extra-args] + +pomegranate\hmm.c: In function '__Pyx_ValidateAndInit_memviewslice': + +pomegranate\hmm.c:53734:22: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:53734:22: warning: format '%s' expects argument of type 'char *', but argument 3 has type 'Py_ssize_t' [-Wformat] + +pomegranate\hmm.c:53734:22: warning: unknown conversion type character 'z' in format [-Wformat] + +pomegranate\hmm.c:53734:22: warning: too many arguments for format [-Wformat-extra-args] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ndarrayobject.h:26:0, + + from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/arrayobject.h:4, + + from pomegranate\hmm.c:347: + +pomegranate\hmm.c: At top level: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__multiarray_api.h:1629:1: warning: '_import_array' defined but not used [-Wunused-function] + +In file included from C:\Anaconda\lib\site-packages\numpy\core\include/numpy/ufuncobject.h:327:0, + + from pomegranate\hmm.c:348: + +C:\Anaconda\lib\site-packages\numpy\core\include/numpy/__ufunc_api.h:241:1: warning: '_import_umath' defined but not used [-Wunused-function] + +pomegranate\hmm.c: In function '__pyx_f_11pomegranate_11pomegranate_17HiddenMarkovModel__sample': + +pomegranate\hmm.c:10433:18: warning: '__pyx_v_k' may be used uninitialized in this function [-Wmaybe-uninitialized] + +writing build\temp.win-amd64-2.7\Release\pomegranate\pomegranate.def + +C:\Anaconda\Scripts\gcc.bat -DMS_WIN64 -shared -s build\temp.win-amd64-2.7\Release\pomegranate\base.o build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o build\temp.win-amd64-2.7\Release\pomegranate\fsm.o build\temp.win-amd64-2.7\Release\pomegranate\hmm.o build\temp.win-amd64-2.7\Release\pomegranate\pomegranate.def -LC:\Anaconda\libs -LC:\Anaconda\PCbuild\amd64 -lpython27 -lmsvcr90 -o build\lib.win-amd64-2.7\pomegranate\pomegranate.pyd + +build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o:bayesnet.c:(.text+0x29b84): multiple definition of `initpomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + +build\temp.win-amd64-2.7\Release\pomegranate\bayesnet.o:bayesnet.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + +build\temp.win-amd64-2.7\Release\pomegranate\fsm.o:fsm.c:(.text+0x519e5): multiple definition of `initpomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + +build\temp.win-amd64-2.7\Release\pomegranate\fsm.o:fsm.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + +build\temp.win-amd64-2.7\Release\pomegranate\hmm.o:hmm.c:(.text+0xd80c9): multiple definition of `initpomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.text+0x1292e): first defined here + +build\temp.win-amd64-2.7\Release\pomegranate\hmm.o:hmm.c:(.bss+0x0): multiple definition of `__pyx_module_is_main_pomegranate__pomegranate' + +build\temp.win-amd64-2.7\Release\pomegranate\base.o:base.c:(.bss+0x0): first defined here + +collect2.exe: error: ld returned 1 exit status + +warning: pomegranate\bayesnet.pyx:192:31: Index should be typed for more efficient access + +error: command 'gcc' failed with exit status 1 + +---------------------------------------- +Cleaning up... +Command C:\Anaconda\python.exe -c "import setuptools, tokenize;__file__='c:\\users\\jacob\\appdata\\local\\temp\\pip_build_Jacob\\pomegranate\\setup.py';exec(compile(getattr(tokenize, 'open', open)(__file__).read().replace('\r\n', '\n'), __file__, 'exec'))" install --record c:\users\jacob\appdata\local\temp\pip-6w7uwt-record\install-record.txt --single-version-externally-managed --compile failed with error code 1 in c:\users\jacob\appdata\local\temp\pip_build_Jacob\pomegranate +Storing debug log for failure in C:\Users\Jacob\pip\pip.log diff --git a/examples/monty_hall_bayes_net.py b/examples/monty_hall_bayes_net.py new file mode 100644 index 00000000..aae36cf8 --- /dev/null +++ b/examples/monty_hall_bayes_net.py @@ -0,0 +1,83 @@ +# Monty Hall Bayes Net Test +# Contact: Jacob Schreiber +# jmschr@cs.washington.edu + +''' +Lets test out the Bayesian Network framework to produce the Monty Hall problem, +but modified a little. The Monty Hall problem is basically a game show where a +guest chooses one of three doors to open, with an unknown one having a prize +behind it. Monty then opens another non-chosen door without a prize behind it, +and asks the guest if they would like to change their answer. Many people were +surprised to find that if the guest changed their answer, there was a 66% chance +of success as opposed to a 50% as might be expected if there were two doors. + +This can be modelled as a Bayesian network with three nodes-- guest, prize, and +Monty, each over the domain of door 'A', 'B', 'C'. Monty is dependent on both +guest and prize, in that it can't be either of them. Lets extend this a little +bit to say the guest has an untrustworthy friend whose answer he will not go with. +''' + +import math +from pomegranate import * + +# Friends emisisons are completely random +friend = DiscreteDistribution( { 'A': 1./3, 'B': 1./3, 'C': 1./3 } ) + +# The guest is conditioned on the friend, basically go against the friend +guest = ConditionalDiscreteDistribution( { + 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.5, 'C' : 0.5 }), + 'B' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.0, 'C' : 0.5 }), + 'C' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.5, 'C' : 0.0 }) + }, [friend]) + +# The actual prize is independent of the other distributions +prize = DiscreteDistribution( { 'A': 1./3, 'B': 1./3, 'C': 1./3 } ) + +# Monty is dependent on both the guest and the prize. +monty = ConditionalDiscreteDistribution( { + 'A' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.5, 'C' : 0.5 }), + 'B' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.0, 'C' : 1.0 }), + 'C' : DiscreteDistribution({ 'A' : 0.0, 'B' : 1.0, 'C' : 0.0 }) }, + 'B' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.0, 'C' : 1.0 }), + 'B' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.0, 'C' : 0.5 }), + 'C' : DiscreteDistribution({ 'A' : 1.0, 'B' : 0.0, 'C' : 0.0 }) }, + 'C' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 1.0, 'C' : 0.0 }), + 'B' : DiscreteDistribution({ 'A' : 1.0, 'B' : 0.0, 'C' : 0.0 }), + 'C' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.5, 'C' : 0.0 }) } + }, [guest, prize] ) + +# Make the states +s0 = State( friend, name="friend" ) +s1 = State( guest, name="guest" ) +s2 = State( prize, name="prize" ) +s3 = State( monty, name="monty" ) + +# Make the bayes net, add the states, and the conditional dependencies. +network = BayesianNetwork( "test" ) +network.add_states( [ s0, s1, s2, s3 ] ) +network.add_transition( s0, s1 ) +network.add_transition( s1, s3 ) +network.add_transition( s2, s3 ) +network.bake() + +print "\t".join([ state.name for state in network.states ]) +print +print "Guest says 'A'" +observations = { 'guest' : 'A' } +beliefs = map( str, network.forward_backward( observations ) ) +print "\n".join( "{}\t{}".format( state.name, belief ) for state, belief in zip( network.states, beliefs ) ) +print +print "Guest says 'A', monty says 'B' (note that prize goes to 66% if you switch)" +observations = { 'guest' : 'A', 'monty' : 'B' } +beliefs = map( str, network.forward_backward( observations ) ) +print "\n".join( "{}\t{}".format( state.name, belief ) for state, belief in zip( network.states, beliefs ) ) +print +print "Friend says 'A', monty said 'B'" +observations = { 'friend' : 'A', 'monty' : 'B' } +beliefs = map( str, network.forward_backward( observations ) ) +print "\n".join( "{}\t{}".format( state.name, belief ) for state, belief in zip( network.states, beliefs ) ) +print +print "50% chance friend says A, 50% chance friend says B" +observations = { 'friend' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.5, 'C' : 0.0 }) } +beliefs = map( str, network.forward_backward( observations ) ) +print "\n".join( "{}\t{}".format( state.name, belief ) for state, belief in zip( network.states, beliefs ) ) diff --git a/examples/rainy_sunny_hmm.py b/examples/rainy_sunny_hmm.py new file mode 100644 index 00000000..10a536a8 --- /dev/null +++ b/examples/rainy_sunny_hmm.py @@ -0,0 +1,54 @@ +# rainy_sunny_hmm.py +# Contact: Jacob Schreiber +# jmschreiber91@gmail.com + +""" +Example rainy-sunny HMM using yahmm. Example drawn from the wikipedia HMM +article: http://en.wikipedia.org/wiki/Hidden_Markov_model describing what +Bob likes to do on rainy or sunny days. +""" + +from pomegranate import * +from pomegranate import HiddenMarkovModel as Model +import random +import math + +random.seed(0) + +model = Model( name="Rainy-Sunny" ) + +# Emission probabilities +rainy = State( DiscreteDistribution({ 'walk': 0.1, 'shop': 0.4, 'clean': 0.5 }) ) +sunny = State( DiscreteDistribution({ 'walk': 0.6, 'shop': 0.3, 'clean': 0.1 }) ) + +model.add_transition( model.start, rainy, 0.6 ) +model.add_transition( model.start, sunny, 0.4 ) + +# Transition matrix, with 0.05 subtracted from each probability to add to +# the probability of exiting the hmm +model.add_transition( rainy, rainy, 0.65 ) +model.add_transition( rainy, sunny, 0.25 ) +model.add_transition( sunny, rainy, 0.35 ) +model.add_transition( sunny, sunny, 0.55 ) + +# Add transitions to the end of the model +model.add_transition( rainy, model.end, 0.1 ) +model.add_transition( sunny, model.end, 0.1 ) + +# Finalize the model structure +model.bake( verbose=True ) + +# Lets sample from this model. +print model.sample() + +# Lets call Bob every hour and see what he's doing! +# (aka build up a sequence of observations) +sequence = [ 'walk', 'shop', 'clean', 'clean', 'clean', 'walk', 'clean' ] + +# What is the probability of seeing this sequence? +print "Probability of Sequence: ", \ + math.e**model.forward( sequence )[ len(sequence), model.end_index ] +print "Probability of Cleaning at Time Step 3 Given This Sequence: ", \ + math.e**model.forward_backward( sequence )[1][ 2, model.states.index( rainy ) ] +print "Probability of the Sequence Given It's Sunny at Time Step 4: ", \ + math.e**model.backward( sequence )[ 3, model.states.index( sunny ) ] diff --git a/examples/tied_state_hmm.py b/examples/tied_state_hmm.py new file mode 100644 index 00000000..6d223eea --- /dev/null +++ b/examples/tied_state_hmm.py @@ -0,0 +1,124 @@ +# tied_state_hmm.py +# Contact: Jacob Schreiber +# jmschreiber91@gmail.com + +""" +An example of using tied states to represent the same distribution across +multiple states. This example is a toy example derived from biology, where we +will look at DNA sequences. The fake structure we will pretend exists is: + +start -> background -> CG island -> background -> poly-T region + +DNA is comprised of four nucleotides, A, C, G, and T. Lets say that in the +background sequence, all of these occur at the same frequency. In the CG +island, the nucleotides C and G occur more frequently. In the poly T region, +T occurs most frequently. + +We need the graph structure, because we fake know that the sequence must return +to the background distribution between the CG island and the poly-T region. +However, we also fake know that both background distributions need to be the same +""" + +from pomegranate import * +from pomgranate import HiddenMarkovModel as Model +import random +import numpy +random.seed(0) + +# Lets start off with an example without tied states and see what happens +print "Without Tied States" +print + +model = Model( "No Tied States" ) + +# Define the four states +background_one = State( DiscreteDistribution({'A': 0.25, 'C':0.25, 'G': 0.25, 'T':0.25 }), name="B1" ) +CG_island = State( DiscreteDistribution({'A': 0.1, 'C':0.4, 'G': 0.4, 'T':0.1 }), name="CG" ) +background_two = State( DiscreteDistribution({'A': 0.25, 'C':0.25, 'G': 0.25, 'T':0.25 }), name="B2" ) +poly_T = State( DiscreteDistribution({'A': 0.1, 'C':0.1, 'G': 0.1, 'T':0.7 }), name="PT" ) + +# Add all the transitions +model.add_transition( model.start, background_one, 1. ) +model.add_transition( background_one, background_one, 0.9 ) +model.add_transition( background_one, CG_island, 0.1 ) +model.add_transition( CG_island, CG_island, 0.8 ) +model.add_transition( CG_island, background_two, 0.2 ) +model.add_transition( background_two, background_two, 0.8 ) +model.add_transition( background_two, poly_T, 0.2 ) +model.add_transition( poly_T, poly_T, 0.7 ) +model.add_transition( poly_T, model.end, 0.3 ) +model.bake( verbose=True ) + +# Define the sequences. Training must be done on a list of lists, not on a string, +# in order to allow strings of any length. +sequences = [ numpy.array(list("TAGCACATCGCAGCGCATCACGCGCGCTAGCATATAAGCACGATCAGCACGACTGTTTTT")), + numpy.array(list("TAGAATCGCTACATAGACGCGCGCTCGCCGCGCTCGATAAGCTACGAACACGATTTTTTA")), + numpy.array(list("GATAGCTACGACTACGCGACTCACGCGCGCGCTCCGCATCAGACACGAATATAGATAAGATATTTTTT")) ] + + +# Print the distributions before training +print +print "\n".join( "{}: {}".format( state.name, state.distribution ) + for state in model.states if not state.is_silent() ) + +# Train +model.train( sequences, stop_threshold=0.01 ) + +# Print the distributions after training +print +print "\n".join( "{}: {}".format( state.name, state.distribution ) + for state in model.states if not state.is_silent() ) + +print "-"*80 + +print "With Tied States" +print + +model = Model( "Tied States" ) + +# Define the background distribution +background = DiscreteDistribution({'A': 0.25, 'C':0.25, 'G': 0.25, 'T':0.25 }) + +# Define the four states. Pass the background distribution to the the two +# background states. This is the only change you need to make. +background_one = State( background, name="B1" ) +CG_island = State( DiscreteDistribution({'A': 0.1, + 'C':0.4, 'G': 0.4, 'T':0.1 }), name="CG" ) +background_two = State( background, name="B2" ) +poly_T = State( DiscreteDistribution({'A': 0.1, + 'C':0.1, 'G': 0.1, 'T':0.7 }), name="PT" ) + +# Add all the transitions +model.add_transition( model.start, background_one, 1. ) +model.add_transition( background_one, background_one, 0.9 ) +model.add_transition( background_one, CG_island, 0.1 ) +model.add_transition( CG_island, CG_island, 0.8 ) +model.add_transition( CG_island, background_two, 0.2 ) +model.add_transition( background_two, background_two, 0.8 ) +model.add_transition( background_two, poly_T, 0.2 ) +model.add_transition( poly_T, poly_T, 0.7 ) +model.add_transition( poly_T, model.end, 0.3 ) +model.bake( verbose=True ) + +# Define the sequences. Training must be done on a list of lists, not on a string, +# in order to allow strings of any length. +sequences = [ numpy.array(list("TAGCACATCGCAGCGCATCACGCGCGCTAGCATATAAGCACGATCAGCACGACTGTTTTT")), + numpy.array(list("TAGAATCGCTACATAGACGCGCGCTCGCCGCGCTCGATAAGCTACGAACACGATTTTTTA")), + numpy.array(list("GATAGCTACGACTACGCGACTCACGCGCGCGCTCCGCATCAGACACGAATATAGATAAGATATTTTTT")) ] + + +# Print the distributions before training +print +print "\n".join( "{}: {}".format( state.name, state.distribution ) + for state in model.states if not state.is_silent() ) + +# Train +model.train( sequences, stop_threshold=0.01 ) + +# Print the distributions after training +print +print "\n".join( "{}: {}".format( state.name, state.distribution ) + for state in model.states if not state.is_silent() ) +print +print "Notice that states B1 and B2 are the same after training with tied states, \ + not so without tied states" diff --git a/pomegranate/__init__.py b/pomegranate/__init__.py index d140b3a2..f3f4bb4a 100644 --- a/pomegranate/__init__.py +++ b/pomegranate/__init__.py @@ -1,6 +1,6 @@ -# __init__.py: Yet Another Hidden Markov Model library +# __init__.py: pomegranate # Contact: Jacob Schreiber ( jmschreiber91@gmail.com ) -# Adam Novak ( anovak1@ucsc.edu ) + """ For detailed documentation and examples, see the README. @@ -42,4 +42,4 @@ from distributions import * from base import * -__version__ = '1.0.0' \ No newline at end of file +__version__ = '0.0.2' \ No newline at end of file diff --git a/pomegranate/base.pxd b/pomegranate/base.pxd index 424fced6..ca7835e7 100644 --- a/pomegranate/base.pxd +++ b/pomegranate/base.pxd @@ -19,4 +19,4 @@ cdef class Model( object ): cdef double [:] out_transition_log_probabilities cdef class StructuredModel( Model ): - pass \ No newline at end of file + pass diff --git a/pomegranate/bayesnet.pyx b/pomegranate/bayesnet.pyx index 6a255505..148572b6 100644 --- a/pomegranate/bayesnet.pyx +++ b/pomegranate/bayesnet.pyx @@ -79,28 +79,18 @@ cdef class BayesianNetwork( Model ): Represents a Bayesian Network """ - def add_edges( self, a, b, weights ): + def add_transition( self, a, b ): + """ + Add a transition from state a to state b which indicates that B is + dependent on A in ways specified by the distribution. """ - Add many transitions at the same time, in one of two forms. - - (1) If both a and b are lists, then create transitions from the i-th - element of a to the i-th element of b with a probability equal to the - i-th element of probabilities. - - Example: - model.add_transitions([model.start, s1], [s1, model.end], [1., 1.]) - - (2) If either a or b are a state, and the other is a list, create a - transition from all states in the list to the single state object with - probabilities and pseudocounts specified appropriately. - Example: - model.add_transitions([model.start, s1, s2, s3], s4, [0.2, 0.4, 0.3, 0.9]) - model.add_transitions(model.start, [s1, s2, s3], [0.6, 0.2, 0.05]) + # Add the transition + self.graph.add_edge(a, b ) - If a single group is given, it's assumed all edges should belong to that - group. Otherwise, either groups can be a list of group identities, or - simply None if no group is meant. + def add_transitions( self, a, b ): + """ + Add multiple conditional dependencies at the same time. """ n = len(a) if isinstance( a, list ) else len(b) @@ -108,20 +98,20 @@ cdef class BayesianNetwork( Model ): # Allow addition of many transitions from many states if isinstance( a, list ) and isinstance( b, list ): # Set up an iterator across all edges - for start, end, weight in izip( a, b, weights ): - self.add_transition( start, end, weight ) + for start, end in izip( a, b ): + self.add_transition( start, end ) # Allow for multiple transitions to a specific state elif isinstance( a, list ) and isinstance( b, State ): # Set up an iterator across all edges to b - for start, weight in izip( a, weights ): - self.add_transition( start, b, weight ) + for start in a: + self.add_transition( start, b ) # Allow for multiple transitions from a specific state elif isinstance( a, State ) and isinstance( b, list ): # Set up an iterator across all edges from a - for end, weight in izip( b, weights ): - self.add_transition( a, end, weight ) + for end in b: + self.add_transition( a, end ) def log_probability( self, data ): ''' @@ -151,24 +141,31 @@ cdef class BayesianNetwork( Model ): def forward( self, data={} ): ''' - Calculate the posterior probabilities of each hidden variable, which - are the variables not observed in the data. + Propogate messages forward through the network from observed data to + distributions which depend on that data. This is not the full belief + propogation algorithm. ''' + # Go from state names:data to distribution object:data names = { state.name: state.distribution for state in self.states } data = { names[state]: value for state, value in data.items() } + # List of factors factors = [ data[ s.distribution ] if s.distribution in data else None for s in self.states ] + # Unpack the edges in_edges = numpy.array( self.in_edge_count ) out_edges = numpy.array( self.out_edge_count ) + # Figure out the roots of the graph, meaning they're independent of the + # remainder of the graph and have been visited roots = numpy.where( in_edges[1:] - in_edges[:-1] == 0 )[0] visited = numpy.zeros( len( self.states ) ) for i, state in enumerate( self.states ): if state.distribution in data.keys(): visited[i] = 1 + # For each of your roots, unpack observed data or use the prior for root in roots: visited[ root ] = 1 if factors[ root ] is not None: @@ -182,6 +179,7 @@ cdef class BayesianNetwork( Model ): else: factors[ root ] = d + # Go through all of the states and while True: for i, state in enumerate( self.states ): if visited[ i ] == 1: @@ -213,19 +211,28 @@ cdef class BayesianNetwork( Model ): def backward( self, data={} ): ''' + Propogate messages backwards through the network from observed data to + distributions which that data depends on. This is not the full belief + propogation algorithm. ''' + # Go from state names:data to distribution object:data names = { state.name: state.distribution for state in self.states } data = { names[state]: value for state, value in data.items() } + # List of factors factors = [ data[ s.distribution ] if s.distribution in data else s.distribution.marginal() for s in self.states ] new_factors = [ i for i in factors ] + # Unpack the edges in_edges = numpy.array( self.in_edge_count ) out_edges = numpy.array( self.out_edge_count ) + # Record the message passed along each edge messages = [ None for i in in_edges ] + # Figure out the leaves of the graph, which are independent of the other + # nodes using the backwards algorithm, and say we've visited them. leaves = numpy.where( out_edges[1:] - out_edges[:-1] == 0 )[0] visited = numpy.zeros( len( self.states ) ) visited[leaves] = 1 @@ -233,14 +240,21 @@ cdef class BayesianNetwork( Model ): if s.distribution in data and not isinstance( data[ s.distribution ], Distribution ): visited[i] = 1 + # Go through the nodes we haven't yet visited and update their beliefs + # iteratively if we've seen all the data which depends on it. while True: for i, state in enumerate( self.states ): + # If we've already visited the state, then don't visit + # it again. if visited[i] == 1: continue + # Unpack the state and the distribution state = self.states[i] d = state.distribution + # Make sure we've seen all the distributions which depend on + # this one, otherwise break. for k in xrange( out_edges[i], out_edges[i+1] ): ki = self.out_transitions[k] if visited[ki] == 0: @@ -249,18 +263,25 @@ cdef class BayesianNetwork( Model ): for k in xrange( out_edges[i], out_edges[i+1] ): ki = self.out_transitions[k] + # Update the parent information parents = {} for l in xrange( in_edges[ki], in_edges[ki+1] ): li = self.in_transitions[l] parents[ self.states[li].distribution ] = factors[li] + # Get the messages for each of those states messages[k] = self.states[ki].distribution.marginal( parents, wrt=d, value=new_factors[ki] ) else: + # Find the local messages which influence these local_messages = [ factors[i] ] + [ messages[k] for k in xrange( out_edges[i], out_edges[i+1] ) ] + + # Merge marginals of each of these, and the prior information new_factors[i] = merge_marginals( local_messages ) + # Mark that we've visited this state. visited[i] = 1 + # If we've visited all states, we're done if visited.sum() == visited.shape[0]: break @@ -269,14 +290,11 @@ cdef class BayesianNetwork( Model ): def forward_backward( self, data={} ): ''' - ... + Propogate messages forward through the network to update beliefs in + each state, then backwards from those beliefs to the remainder of + the network. This is the sum-product belief propogation algorithm. ''' - print "FORWARD LOGS" factors = self.forward( data ) data = { self.states[i].name: factors[i] for i in xrange( len(factors) ) } - - print "\n".join( map( str, factors ) ) - print - print "BACKWARD LOGS" return self.backward( data ) \ No newline at end of file diff --git a/pomegranate/distributions.pyx b/pomegranate/distributions.pyx index b334ae1e..ff9110bc 100644 --- a/pomegranate/distributions.pyx +++ b/pomegranate/distributions.pyx @@ -227,10 +227,14 @@ cdef class UniformDistribution( Distribution ): if self.frozen == True: return - if weights is not None: - # Throw out items with weight 0 - items = [item for (item, weight) in izip(items, weights) - if weight > 0] + # Calculate weights. If none are provided, give uniform weights + if weights is None: + weights = numpy.ones_like( items ) + else: + weights = numpy.asarray( weights ) + + if weights.sum() == 0: + return if len(items) == 0: # No sample, so just ignore it and keep our old parameters. @@ -251,10 +255,13 @@ cdef class UniformDistribution( Distribution ): summary statistic to be used in training later. """ - if weights is not None: - # Throw out items with weight 0 - items = [ item for item, weight in izip( items, weights ) - if weight > 0 ] + if weights is None: + weights = numpy.ones_like( items ) + else: + weights = numpy.asarray( weights ) + + if weights.sum() == 0: + return if len( items ) == 0: # No sample, so just ignore it and keep our own parameters. @@ -417,6 +424,9 @@ cdef class NormalDistribution( Distribution ): else: weights = numpy.asarray( weights ) + if weights.sum() == 0: + return + # Save the mean and variance, the summary statistics for a normal # distribution. mean = numpy.average( items, weights=weights ) @@ -574,6 +584,9 @@ cdef class LogNormalDistribution( Distribution ): else: weights = numpy.asarray( weights ) + if weights.sum() == 0: + return + # Calculate the mean and variance, which are the summary statistics # for a log-normal distribution. mean = numpy.average( numpy.log(items), weights=weights ) @@ -738,6 +751,9 @@ cdef class ExponentialDistribution( Distribution ): else: weights = numpy.asarray( weights ) + if weights.sum() == 0: + return + # Calculate the summary statistic, which in this case is the mean. mean = numpy.average( items, weights=weights ) self.summaries.append( [ mean, weights.sum() ] ) @@ -1131,8 +1147,6 @@ cdef class DiscreteDistribution(Distribution): self.name = "DiscreteDistribution" self.frozen = frozen - - def log_probability( self, symbol ): """ What's the probability of the given symbol under this distribution? @@ -1209,6 +1223,9 @@ cdef class DiscreteDistribution(Distribution): else: weights = numpy.asarray( weights ) + if weights.sum() == 0: + return + characters = self.summaries[0] for character, weight in izip( items, weights ): try: @@ -1650,7 +1667,107 @@ cdef class MixtureDistribution( Distribution ): of distribution. """ - raise NotImplementedError + """ + Perform EM to estimate the parameters of each distribution + which is a part of this mixture. + """ + + if weights is None: + weights = numpy.ones( len(items) ) + else: + weights = numpy.asarray( weights ) + + if weights.sum() == 0: + return + + distributions, w = self.parameters + n, k = len(items), len(distributions) + + # The responsibility matrix + r = numpy.zeros( (n, k) ) + + # Calculate the log probabilities of each p + for i, distribution in enumerate( distributions ): + for j, item in enumerate( items ): + r[j, i] = distribution.log_probability( item ) + + r = numpy.exp( r ) + + # Turn these log probabilities into responsibilities by + # normalizing on a row-by-row manner. + for i in xrange( n ): + r[i] = r[i] / r[i].sum() + + # Weight the responsibilities by the given weights + for i in xrange( k ): + r[:,i] = r[:,i]*weights + + # Update the emissions of each distribution + for i, distribution in enumerate( distributions ): + distribution.from_sample( items, weights=r[:,i] ) + + # Update the weight of each distribution + self.parameters[1] = r.sum( axis=0 ) / r.sum() + + def summarize( self, items, weights=None ): + """ + Performs the summary step of the EM algorithm to estimate + parameters of each distribution which is a part of this mixture. + """ + + if weights is None: + weights = numpy.ones( len(items) ) + else: + weights = numpy.asarray( weights ) + + if weights.sum() == 0: + return + + distributions, w = self.parameters + n, k = len(items), len(distributions) + + # The responsibility matrix + r = numpy.zeros( (n, k) ) + + # Calculate the log probabilities of each p + for i, distribution in enumerate( distributions ): + for j, item in enumerate( items ): + r[j, i] = distribution.log_probability( item ) + + r = numpy.exp( r ) + + # Turn these log probabilities into responsibilities by + # normalizing on a row-by-row manner. + for i in xrange( n ): + r[i] = r[i] / r[i].sum() + + # Weight the responsibilities by the given weights + for i in xrange( k ): + r[:,i] = r[:,i]*weights + + # Save summary statistics on the emission distributions + for i, distribution in enumerate( distributions ): + distribution.summarize( items, weights=r[:,i]*weights ) + + # Save summary statistics for weight updates + self.summaries.append( r.sum( axis=0 ) / r.sum() ) + + def from_summaries( self, inertia=0.0 ): + """ + Performs the actual update step for the EM algorithm. + """ + + # If this distribution is frozen, don't do anything. + if self.frozen == True: + return + + # Update the emission distributions + for d in self.parameters[0]: + d.from_summaries( inertia=inertia ) + + # Update the weights + weights = numpy.array( self.summaries ) + self.parameters[1] = weights.sum( axis=0 ) / weights.sum() cdef class MultivariateDistribution( Distribution ): """ @@ -1823,6 +1940,8 @@ cdef class ConditionalDiscreteDistribution( ConditionalDistribution ): def marginal( self, parent_values={}, wrt=None, value=None ): ''' Sum over all parent distributions to return the marginal distribution. + If given a distribution to marginalize with respect to (wrt), then + return the marginal wrt that distribution. ''' d, pd, keys = self.parameters diff --git a/setup.py b/setup.py index 64e0132f..d3079dff 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ else: use_cython = True -cmdclass = { } +cmdclass = {} if use_cython: ext_modules = [ @@ -33,7 +33,7 @@ setup( name='pomegranate', - version='0.0.1', + version='0.0.2', author='Jacob Schreiber', author_email='jmschreiber91@gmail.com', packages=['pomegranate'], diff --git a/test.py b/test.py deleted file mode 100644 index f638bfac..00000000 --- a/test.py +++ /dev/null @@ -1,111 +0,0 @@ -import math -from pomegranate import * - -####################### -# MODIFIED MONTY HALL # -####################### -''' -friend = DiscreteDistribution( { 'A': 1./3, 'B': 1./3, 'C': 1./3 } ) - -guest = ConditionalDiscreteDistribution( { - 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.5, 'C' : 0.5 }), - 'B' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.0, 'C' : 0.5 }), - 'C' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.5, 'C' : 0.0 }) - }, [friend]) - -prize = DiscreteDistribution( { 'A': 1./3, 'B': 1./3, 'C': 1./3 } ) - -# Encoding is CHOSEN : ACTUAL : MONTY -monty = ConditionalDiscreteDistribution( { - 'A' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.5, 'C' : 0.5 }), - 'B' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.0, 'C' : 1.0 }), - 'C' : DiscreteDistribution({ 'A' : 0.0, 'B' : 1.0, 'C' : 0.0 }) }, - 'B' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 0.0, 'C' : 1.0 }), - 'B' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.0, 'C' : 0.5 }), - 'C' : DiscreteDistribution({ 'A' : 1.0, 'B' : 0.0, 'C' : 0.0 }) }, - 'C' : { 'A' : DiscreteDistribution({ 'A' : 0.0, 'B' : 1.0, 'C' : 0.0 }), - 'B' : DiscreteDistribution({ 'A' : 1.0, 'B' : 0.0, 'C' : 0.0 }), - 'C' : DiscreteDistribution({ 'A' : 0.5, 'B' : 0.5, 'C' : 0.0 }) } - }, [guest, prize] ) - -s0 = State( friend, name="friend" ) -s1 = State( guest, name="guest" ) -s2 = State( prize, name="prize" ) -s3 = State( monty, name="monty" ) - -network = BayesianNetwork( "test" ) -network.add_states( [ s0, s1, s2, s3 ] ) -network.add_transition( s0, s1, 1.0 ) -network.add_transition( s1, s3, 1.0 ) -network.add_transition( s2, s3, 1.0 ) -network.bake() - -print "\t".join([ state.name for state in network.states ]) -print "\n".join( map( str, network.forward_backward( { 'friend' : 'A', 'monty' : 'B' } ) ) ) -''' -################ -# ASIA EXAMPLE # -################ - -asia = DiscreteDistribution({ 'True' : 0.5, 'False' : 0.5 }) -tuberculosis = ConditionalDiscreteDistribution({ - 'True' : DiscreteDistribution({ 'True' : 0.2, 'False' : 0.80 }), - 'False' : DiscreteDistribution({ 'True' : 0.01, 'False' : 0.99 }) - }, [asia]) - -smoking = DiscreteDistribution({ 'True' : 0.5, 'False' : 0.5 }) -lung = ConditionalDiscreteDistribution({ - 'True' : DiscreteDistribution({ 'True' : 0.75, 'False' : 0.25 }), - 'False' : DiscreteDistribution({ 'True' : 0.02, 'False' : 0.98 }) - }, [smoking] ) -bronchitis = ConditionalDiscreteDistribution({ - 'True' : DiscreteDistribution({ 'True' : 0.92, 'False' : 0.08 }), - 'False' : DiscreteDistribution({ 'True' : 0.03, 'False' : 0.97}) - }, [smoking] ) - -tuberculosis_or_cancer = ConditionalDiscreteDistribution({ - 'True' : { 'True' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), - 'False' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), - }, - 'False' : { 'True' : DiscreteDistribution({ 'True' : 1.0, 'False' : 0.0 }), - 'False' : DiscreteDistribution({ 'True' : 0.0, 'False' : 1.0 }) - } - }, [tuberculosis, lung] ) - -xray = ConditionalDiscreteDistribution({ - 'True' : DiscreteDistribution({ 'True' : .885, 'False' : .115 }), - 'False' : DiscreteDistribution({ 'True' : 0.04, 'False' : 0.96 }) - }, [tuberculosis_or_cancer] ) - -dyspnea = ConditionalDiscreteDistribution({ - 'True' : { 'True' : DiscreteDistribution({ 'True' : 0.96, 'False' : 0.04 }), - 'False' : DiscreteDistribution({ 'True' : 0.89, 'False' : 0.11 }) - }, - 'False' : { 'True' : DiscreteDistribution({ 'True' : 0.82, 'False' : 0.18 }), - 'False' : DiscreteDistribution({ 'True' : 0.4, 'False' : 0.6 }) - } - }, [tuberculosis_or_cancer, bronchitis]) - -s0 = State( asia, name="asia" ) -s1 = State( tuberculosis, name="tuberculosis" ) -s2 = State( smoking, name="smoker" ) -s3 = State( lung, name="cancer" ) -s4 = State( bronchitis, name="bronchitis" ) -s5 = State( tuberculosis_or_cancer, name="TvC" ) -s6 = State( xray, name="xray" ) -s7 = State( dyspnea, name='dyspnea' ) - -network = BayesianNetwork( "asia" ) -network.add_states([ s0, s1, s2, s3, s4, s5, s6, s7 ]) -network.add_transition( s0, s1, 1.0 ) -network.add_transition( s1, s5, 1.0 ) -network.add_transition( s2, s3, 1.0 ) -network.add_transition( s2, s4, 1.0 ) -network.add_transition( s3, s5, 1.0 ) -network.add_transition( s5, s6, 1.0 ) -network.add_transition( s5, s7, 1.0 ) -network.add_transition( s4, s7, 1.0 ) -network.bake() - -print "\t".join([ state.name for state in network.states ]) -print "\n".join( map( str, network.forward_backward({ 'tuberculosis' : 'True', 'smoker' : 'False', 'bronchitis' : DiscreteDistribution({ 'True' : 0.8, 'False' : 0.2 }) }) ) )