import numpy as np import matplotlib.pyplot as plt N=400 # number of data points - you can increase this if you want to # learn better features (but it will take longer). D=16 # dimensionality of the data np.random.seed(0) # Define the basic shapes of the features m1 = [0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0] m2 = [0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0] m3 = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] m4 = [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1] m5 = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0] m6 = [1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1] m7 = [0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0] m8 = [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1] nfeat = 8 # number of features rr = 0.5 + np.random.rand(nfeat, 1) * 0.5 # weight of each feature between 0.5 and 1 mut = np.array([rr[0] * m1, rr[1] * m2, rr[2] * m3, rr[3] * m4, rr[4] * m5, rr[5] * m6, rr[6] * m7, rr[7] * m8]) s = np.random.rand(N, nfeat) < 0.3 # each feature occurs with prob 0.3 independently # Generate Data - The Data is stored in Y Y = np.dot(s, mut) + np.random.randn(N, D) * 0.1 # some Gaussian noise is added nrows = 13 for k in range(16): plt.subplot(4, 4, k + 1) plt.imshow(np.reshape(Y[k], (4, 4)), cmap=plt.gray(), interpolation='none') plt.axis('off') plt.show()