Getting an error with Openpyxl with Kivy - kivy

I'm trying to use some my python code I've written using IPython on Kivy, but I'm getting an error that says it cannot import name BUILTIN_FORMATS, which is called from the styleable.py within openpyxl.
BTW I used:
import openpyxl as xl
It works perfectly fine when I run the code within IPython.
Does anyone know how I can fix this.
EDIT: I've already tried reinstalling openpyxl with pip.
EDIT2: I'm on windows 7, and here's my code:
#!/usr/bin/kivy
import kivy
import random
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import requests
import openpyxl as xl
from operator import itemgetter
from collections import Counter
from lxml import html
#function to load the table form the excel file corresponding to the passed sheet name
def loadTable(sheetName):
lotteryData = pd.ExcelFile("Lottery databases.xlsx") #grabs and loads the file into memory
df = lotteryData.parse(sheetName) #loads the data table form the corresponding sheetName into the df data frame
return df
#function to display the table
def showTable(table):
#get the number of rows the table has
no_of_rows = len(table.index)
#display the table
return table.head(no_of_rows)
#function to display pie charts of a specific column within the database
#table is the database that the function will be working with
#and column is a numberical vaule of which column to get the data from
def printPieChart(table, column):
if column == 6:
columnList = table.iloc[:, -1:].values.T.ravel()
else:
columnList = table.iloc[:, (column - 7): (column - 6)].values.T.ravel()
countedList = Counter(columnList)
#set up the size of the pie chart
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
cmap = plt.cm.prism
#input variables for pie function
slices = [float(v) for v in countedList.values()]
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [float(k) for k in countedList]
columnHeaders = list(table.columns.values)
#the pie chart
pie_wedge_collection = ax.pie(slices, colors = colors, labels = labels, labeldistance = 1.05, autopct = '%1.1f%%')
#get rid of the black outlines between the wedges and around the pie
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
ax.set_title(columnHeaders[column + 1])
#can't display a Legends as there's too many for plt.legends() too handle
#return pyplot.pie([float(v) for v in countedList.values()], labels = [float(k) for k in countedList])
def updateDatabase():
wb = xl.load_workbook("Lottery databases.xlsx") #load the workbook into memory
#list of the sheet names within the workbook
sheetnames = ["SuperLotto", "MegaMillions", "Powerball"]
days = ["Tue. ", "Wed. ", "Fri. ", "Sat. "] #days the draws on done on
#list of the webpages to use grab the new draws
webPages = ['http://www.calottery.com/play/draw-games/superlotto-plus/winning-numbers', 'http://www.calottery.com/play/draw-games/mega-millions/winning-numbers', 'http://www.calottery.com/play/draw-games/powerball/winning-numbers']
x = 3
while x != 0:
ws = wb.get_sheet_by_name(sheetnames[x-1]) # which sheet to update
rowIndex = ws.get_highest_row() # gets the highest row index in the sheet
lastCellValue = ws.cell(row = rowIndex - 1, column = 0).value #gets the last value in the first column, draw number
page = requests.get(webPages[x-1]) #grabs the webpage needed
tree = html.fromstring(page.text) #puts the webpage into a tree structure to make it easy to traverse
#get the newest draw and date from the webpage for comparasion purposes
draw_and_date = tree.xpath('//*[#id="objBody_content_0_pagecontent_0_objPastWinningNumbers_rptPast_ctl01_lblDrawDateNumber"]/text()')
#if the table is up to date, it will move on to the next table else it will update it
y = int(draw_and_date[0][-4:]) - int(lastCellValue) # checks to see how many draws are missing from the table
if y == 0:
print("The table for " + sheetnames[x-1] + " is up to date.")
x -= 1 #decrement x by 1 to move on to the next table
else:
#while loop to check if the table needs to be updated or not, if yes it will update it
while y != 0:
#grabs the draw and date of the missing draws from the table
draw_and_date = tree.xpath('//*[#id="objBody_content_0_pagecontent_0_objPastWinningNumbers_rptPast_ctl0' + str(y) + '_lblDrawDateNumber"]/text()')
numbers = tree.xpath(".//*[#id='content']/div[3]/table/tr[" + str(y) + "]/td[2]/span/text()") #numbers
numbers = [int(x) for x in numbers] # converts the text to integers
numbers.sort() #sort the number from smallest to largest
mega = tree.xpath(".//*[#id='content']/div[3]/table/tr[" + str(y) + "]/td[3]/text()") #mega number
mega = int(mega[0]) # converts the text to integers
#write to the file
if sheetnames[x-1] == "MegaMillions":
d = 0
else:
d = 1
if int(draw_and_date[0][-4:]) % 2 == 0:
# if the draw date is even then the day is a Friday/Saturday
ws.append([int(draw_and_date[0][-4:]), (days[d+2] + draw_and_date[0][:12]), numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], mega]) # print the draw date
else:
# if the draw date is odd then the day is a Tuesday/Wednesday
ws.append([int(draw_and_date[0][-4:]), (days[d] + draw_and_date[0][:12]), numbers[0], numbers[1], numbers[2], numbers[3], numbers[4], mega])
y -= 1 #decrement y by 1 to get the next missing draw
print("Updated the " + sheetnames[x-1] + " table successfully!")
x -= 1 #decrement x by 1 to move on to the next table
wb.save("Lottery databases.xlsx") #save the workbook
print("Saved the database Sucessfully!")
and so on...

Related

How do I make it look like the picture in Lua

This is my code
local level = 5
for i = 1, level do
local text = ""
for j = 1, i do
text = text..""
end
for j = 1, level-i, 1 do
text = text.." "
end
for j = 1+level, level+(level-i) do
text = text.." "
end
for j = 1, level + i-level do
text = text..""
end
print(text)
end
I want the result to be similar to the one in the picture.
Here is what your code looks like with proper formatting.
local level = 5
for i = 1, level do
local text = ""
for j = 1, i do
text = text..""
end
for j = 1, level-i, 1 do
text = text.." "
end
for j = 1+level, level+(level-i) do
text = text.." "
end
for j = 1, level + i-level do
text = text..""
end
print(text)
end
Your current code prints... well... an empty string. You haven't yet added the characters it's to display to be on par with the image.
The amount of characters per row is 9. So you ideally need 9 characters per row. You will also be incrementing the number once per row. The amount of characters per row also increases by 2; one on each side.
We can use the string.rep(string, number) function to duplicate a 'string' 'number' times. You can feed in your current level into that so it generates 1 2 or 3 depending on the line the number of times. Then you have whitespace to worry about. You can use string.rep again along with a bit of distance math to calculate the amount of whitespace you need from what you take up. Then finally throw everything together concatenated trailing with the first string and print.
local levels = 5
local columns = 9
for i=1, levels do
local str = string.rep(i, i)
local padding = columns - (#str * 2) + 1
print(str .. string.rep(" ", padding) .. str)
end

how to optimize my MIP model to find the best combination of columns and rows with minimum of NAN values?

I have a dataframe with lots of NAN values. My objective is to find the best conbination of columns and rows to maximize my data and minimize the NAN values. One solution I found is to use the ompr package and create a MIP model to solve the problem.
Here's the model :
m <- +!is.na(M) # gets logical matrix; 0 if NA else 1
nr <- nrow(m)
nc <- ncol(m)
n_years <- 17
model <- MIPModel() %>%
# keep[i,j] is 1 if matrix cell [i,j] is to be kept else 0
add_variable(keep[i,j], i = 1:nr, j = 1:nc, typ = "binary") %>%
# rm_row[i] is 1 if row i is selected for removal else 0
add_variable(rm_row[i], i = 1:nr, type = "binary") %>%
# rm_col[j] is 1 if column j is selected for removal else 0
add_variable(rm_col[j], j = 1:nc, type = "binary") %>%
# maximize good cells kept
set_objective(sum_expr(keep[i,j], i = 1:nr, j = 1:nc), "max") %>%
# cell can be kept only when row is not selected for removal
add_constraint(sum_expr(keep[i,j], j = 1:nc) <= 1 - rm_row[i], i = 1:nr) %>%
# cell can be kept only when column is not selected for removal
add_constraint(sum_expr(keep[i,j], i = 1:nr) <= 1 - rm_col[j], j = 1:nc) %>%
# only non-NA values can be kept
add_constraint(m[i,j] + rm_row[i] + rm_col[j] >= 1, i = 1:nr, j = 1:nc) %>%
# keep at most n columns i.e. remove at least (nc - n_years) columns
add_constraint(sum_expr(rm_col[j], j = 1:nc) >= nc - n_years)
model
This model works just fine as long as my data is not huge which is not the case for my data (3500 rows x 180 columns)
this model has a lot of contsraints which is why it takes too long to solve. Is there another way to recreate this model so it has less constraints and calculates faster?

VBA SUMIF on Array

OK, so I am wanting to do a sumif on a column on an array because I don't want to print to the worksheet in order to obtain a range type for a Worksheet.function.Sumif, the idea is to stay completely out in VBA code and write to the worksheet as little as possible. I am trying to optimize for speed: 4814 rows by 40 columns X 60.
The first column total is total of 197,321,164 is correct, the next columns are low and instead of going to quarter 40 the Else kicks in and everything after 8 is 0. The first "To_Quarter" in the array is 9 so with the >= I would think it would go to 9. I tried putting my Next I before the End IF but then it just asks for the For.
image of locals box: https://ibb.co/cgFQhxY
Any help would be much appreciated.
Sub SumifONarray()
Dim arrQuarters, arrNumber_of_Assets As Variant
Dim I As Long, J As Long
arrNumber_of_Assets = Range("Costs_Number_of_Assets")
arrQuarters = Range("Quarters_1to40")
Dim MaxRecov_If_result, arr_Row10_Resolution_Physical_Possession_Expenses_To_Quarter, arr_Row10_Resolution__Max_Recovery_Grid As Variant
arr_Row10_Resolution_Physical_Possession_Expenses_To_Quarter = Range("_Row10_Resolution_Physical_Possession_Expenses_To_Quarter")
arr_Row10_Resolution__Max_Recovery_Grid = Range("_Row10_Resolution__Max_Recovery_Grid")
ReDim arrIf_Max_Recovery_Amount_Sum(1 To 1, 1 To UBound(arrQuarters, 2))
For J = LBound(arrQuarters, 2) To UBound(arrQuarters, 2)
For I = LBound(arrNumber_of_Assets, 1) To UBound(arrNumber_of_Assets, 1)
If arr_Row10_Resolution_Physical_Possession_Expenses_To_Quarter(I, 1) >= arrQuarters(1, J) Then
MaxRecov_If_result = MaxRecov_If_result + arr_Row10_Resolution__Max_Recovery_Grid(I, J)
Else: MaxRecov_If_result = 0
End If
Next I
arrIf_Max_Recovery_Amount_Sum(1, J) = MaxRecov_If_result
MaxRecov_If_result = 0
Next J
End Sub
I've uploaded a sample below with code with 10 rows.
https://easyupload.io/wfixds

unsupervised learning how to get number of clusters

In this code below the author says that -
"Before I begin the kmeans clustering I want to use a hierarchial clustering to figure how many clusters I should have. I truncated the dendrogram because if I didn't the dendrogram will be hard to read. I cut at 20 because it has the second biggest distance jump (the first big jump is at 60). After the cut there are 7 clusters."
I am not able to see in the Dendrogram how he arrived at the numbers he mentioned - 20, 60 or 7
I am attaching the dendrogram that I have got from the sample data taken from his github example and am wondering if anyone can shed light on how he arrived at the numbers 20, 60 or 7
he also says "Let's fit k-means on the matrix with a range of clusters 1 - 19." where did he get that range 1 to 19 from? is it cause of the drop at 20 (or the cut off at 20)
github - https://github.com/moyphilip/SKU-Clustering
Also what would one say should be the number of clusters in this second image attached here ? 6 clusters ? (its a different dataset)
from sklearn.feature_extraction.text import TfidfVectorizer
import os
import pandas as pd
import re
import numpy as np
df = pd.read_csv('sample-data.csv')
def split_description(string):
string_split = string.split(' - ',1)
name = string_split[0]
return name
df_new = pd.DataFrame()
df_new['name'] = df.loc[:,'description'].apply(lambda x: split_description(x))
df_new['id'] = df['id']
def remove(name):
new_name = re.sub("[0-9]", '', name)
new_name = ' '.join(new_name.split())
return new_name
df_new['name'] = df_new.loc[:,'name'].apply(lambda x: remove(x))
df_new.head()
tfidf_vectorizer = TfidfVectorizer(
use_idf=True,
stop_words = 'english',
ngram_range=(1,4), min_df = 0.01, max_df = 0.8)
tfidf_matrix = tfidf_vectorizer.fit_transform(df_new['name'])
print (tfidf_matrix.shape)
print (tfidf_vectorizer.get_feature_names())
from sklearn.metrics.pairwise import cosine_similarity
dist = 1.0 - cosine_similarity(tfidf_matrix)
print (dist)
from scipy.cluster.hierarchy import ward, dendrogram
#run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix,
truncate_mode='lastp', # show only the last p merged clusters
p=20, # show only the last p merged clusters
leaf_rotation=90.,
leaf_font_size=12.,
labels=list(df_new['name']))
plt.axhline(y=20, linewidth = 2, color = 'black')
fig.suptitle("Hierarchial Clustering Dendrogram Truncated", fontsize = 35, fontweight = 'bold')
#fig.show()
from sklearn.cluster import KMeans
num_clusters = range(1,20)
KM = [KMeans(n_clusters=k, random_state = 1).fit(tfidf_matrix) for k in num_clusters]
# Let's plot the within cluster sum of squares for each k to see which k I should choose.
#
# The plot shows a steady decline from from 0 to 19. Since the elbow rule does not apply for this I will choose k = 7 because of the previous dendrogram.
# In[17]:
import matplotlib.pyplot as plt
#get_ipython().run_line_magic('matplotlib', 'inline')
with_in_cluster = [KM[k].inertia_ for k in range(0,len(num_clusters))]
plt.plot(num_clusters, with_in_cluster)
plt.ylim(min(with_in_cluster)-1000, max(with_in_cluster)+1000)
plt.ylabel('with-in cluster sum of squares')
plt.xlabel('# of clusters')
plt.title('kmeans within ss for k value')
plt.show()
# I add the cluster label to each record in df_new
# In[18]:
model = KM[6]
clusters = model.labels_.tolist()
df_new['cluster'] = clusters
# Here is the distribution of clusters. Cluster 0 has a records, then cluster 1. Cluster 2 - 4 seem pretty even.
# In[19]:
df_new['cluster'].value_counts()
# I print the top terms per cluster and the names in the respective cluster.
# In[20]:
print("Top terms per cluster:")
print
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = tfidf_vectorizer.get_feature_names()
for i in range(model.n_clusters):
print ("Cluster %d : " %i )
for ind in order_centroids[i, :10]:
print ( '%s' % terms[ind])
print
print ("Cluster %d names:" %i)
for idx in df_new[df_new['cluster'] == i]['name'].sample(n = 10):
print ( ' %s' %idx)
print
print
# I reduce the dist to 2 dimensions with MDS. The dissimilarity is precomputed because we provide 1 - cosine similarity. Then I assign the x and y variables.
# In[21]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.manifold import MDS
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
pos = mds.fit_transform(dist)
xs, ys = pos[:, 0], pos[:, 1]
# In[22]:
cluster_colors = {0: '#85C1E9', 1: '#FF0000', 2: '#800000', 3: '#04B320',
4: '#6033FF', 5: '#33FF49', 6: '#F9E79F', 7: '#935116',
8: '#9B59B6', 9: '#95A5A6'}
cluster_labels = {0: 'vest dress print', 1: 'shirt merino island',
2: 'pants guide pants guide', 3: 'shorts board board shorts',
4: 'simply live live simply', 5: 'cap cap bottoms bottoms',
6: 'jkt zip jkt guide'}
#some ipython magic to show the matplotlib plots inline
#get_ipython().run_line_magic('matplotlib', 'inline')
#create data frame that has the result of the MDS plus the cluster numbers and titles
df_plot = pd.DataFrame(dict(x=xs, y=ys, label=clusters, name=df_new['name']))
#group by cluster
groups = df_plot.groupby('label')
# set up plot
fig, ax = plt.subplots(figsize=(17, 9)) # set size
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12,
label = cluster_labels[name],
color = cluster_colors[name])
ax.set_aspect('auto')
ax.legend(numpoints = 1)
fig.suptitle("SKU Clustering", fontsize = 35, fontweight = 'bold')
#plt.show()

Tooltip of highchart is not displaying properly

I am trying to generate column chart of highcharts using rchart library. Spline and pie chart worked me fine. But column chart's tooltip is not rendering properly
I am using following code to generate it
res <- getForumNumDiscussionsData()
h1 <- Highcharts$new()
h1$chart(type = "column")
res <- getForumNumDiscussionsData()
h1 <- Highcharts$new()
h1$chart(type = "column")
#axis
h1$xAxis(title=list(text="Number of Discussions"),categories=res$categories)
h1$yAxis(title=list(text="Number of students"))
#plot options
h1$plotOptions(column=list(pointPadding=0,borderWidth=0))
#series
h1$series(data = res$count,name="students")
#export
h1$exporting(sourceWidth = 1000, sourceHeight = 400)
and data frame : res
categories count
1 < 13 12
2 < 24 121
3 < 39 13
Can anybody help me to render tooltip text properly

Resources