Plotly Choropleth not showing boundaries of geojson file - geojson

I'm trying to show regions within the UK from the following geojson file: https://data.nationalgrideso.com/system/gis-boundaries-for-gb-dno-license-areas
However I can't seem to get any results displayed, besides the scale and mapbox frame. Here's my test code below:
import os, json
import numpy as np
import pandas as pd
import plotly.express as px
#dataframe
d = {'Value': np.random.randint(1000,size=14),
'geojson_id':[17,22,21,12,19,10,20,18,13,16,15,23,11,14]}
df= pd.DataFrame(data=d)
#geojson file
basedir = os.path.abspath(os.path.dirname(__file__))
filename= os.path.join(basedir, 'dno_license_areas_20200506.geojson')
with open(filename) as response:
dno_zones = json.load(response)
#Choropleth PLOT
fig = px.choropleth_mapbox(df, geojson=dno_zones,color='Value',
locations='geojson_id', featureidkey="properties.ID",
center={"lat": 54.3, "lon": 0.0},
mapbox_style="carto-positron", zoom=4)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
What am I missing ?

when you look at the geojson you will find it has this CRS
<Derived Projected CRS: EPSG:27700>
Name: OSGB36 / British National Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: United Kingdom (UK) - offshore to boundary of UKCS within 49°45'N to 61°N and 9°W to 2°E; onshore Great Britain (England, Wales and Scotland). Isle of Man onshore.
- bounds: (-9.0, 49.75, 2.01, 61.01)
Coordinate Operation:
- name: British National Grid
- method: Transverse Mercator
Datum: Ordnance Survey of Great Britain 1936
- Ellipsoid: Airy 1830
- Prime Meridian: Greenwich
that means that you need to project it to a CRS that mapbox uses
I have used geopandas for this
import os, json, requests
import numpy as np
import pandas as pd
import plotly.express as px
import geopandas as gpd
# dataframe
d = {
"Value": np.random.randint(1000, size=14),
"geojson_id": [17, 22, 21, 12, 19, 10, 20, 18, 13, 16, 15, 23, 11, 14],
}
df = pd.DataFrame(data=d)
# #geojson file
# basedir = os.path.abspath(os.path.dirname(__file__))
# filename= os.path.join(basedir, 'dno_license_areas_20200506.geojson')
# with open(filename) as response:
# dno_zones = json.load(response)
dno_zones = requests.get(
"https://data.nationalgrideso.com/backend/dataset/0e377f16-95e9-4c15-a1fc-49e06a39cfa0/resource/e96db306-aaa8-45be-aecd-65b34d38923a/download/dno_license_areas_20200506.geojson"
).json()
dno_zones = (
gpd.GeoDataFrame.from_features(
dno_zones, crs=dno_zones["crs"]["properties"]["name"]
)
.to_crs("epsg:4326")
.__geo_interface__
)
# Choropleth PLOT
fig = px.choropleth_mapbox(
df,
geojson=dno_zones,
color="Value",
locations="geojson_id",
featureidkey="properties.ID",
center={"lat": 54.3, "lon": 0.0},
mapbox_style="carto-positron",
zoom=4,
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()

Related

How to solve "ValueError: Size is invalid. Valid font size are xx-small, x-small, small, medium, large, x-large, xx-large, larger, smaller, None"?

I used
import matplotlib as mpl
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
t = ax.text(0.5, 0.5, 'Text')
fonts = ['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large', 'larger', 'smaller']
for font in fonts:
t.set_fontsize(font)
print (font, round(t.get_fontsize(), 2))
plt.close()
The output was
Blockquotexx-small 5.79
x-small 6.94
small 8.33
medium 10.0
large 12.0
x-large 14.4
xx-large 17.28
larger 12.0
smaller 8.33
Blockquote
Then I run my histogram coding by following codes and found the error.
import matplotlib.pyplot as plt
# Put your code here to create the plot
# %matplotlib inline
fig = plt.figure(figsize = (10, 5))
ax = fig.gca()
Plot_file['tripduration'].hist(ax = ax, alpha = 0.75, xlabelsize='Seconds', ylabelsize= 'Number of rides', bins=50)
Is there anyone to solve this error please?

How to plot decision boundaries for Random Forest classifier

How to go about plotting the decision boundaries for a Random Forest analysis with 10 classes?
I get the error:
ValueError: X has 2 features, but RandomForestClassifier is expecting
240 features as input.
Can you help me get the decision boundaries for the 10 classes if possible? Thanks for your time!
Here is my code:
from sklearn.datasets import make_classification
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
f, (ax1,ax2) = plt.subplots(nrows=1, ncols=2,figsize=(20,8))
# Generate noisy Data
num_trainsamples = 500
num_testsamples = 50
X_train,y_train = make_classification(n_samples=num_trainsamples,
n_features=240,
n_informative=9,
n_redundant=0,
n_repeated=0,
n_classes=10,
n_clusters_per_class=1,
class_sep=9,
flip_y=0.2,
#weights=[0.5,0.5],
random_state=17)
X_test,y_test = make_classification(n_samples=50,
n_features=num_testsamples,
n_informative=9,
n_redundant=0,
n_repeated=0,
n_classes=10,
n_clusters_per_class=1,
class_sep=10,
flip_y=0.2,
#weights=[0.5,0.5],
random_state=17)
model = RandomForestClassifier()
parameter_space = {
'n_estimators': [10,50,100],
'criterion': ['gini', 'entropy'],
'max_depth': np.linspace(10,50,11),
}
clf = GridSearchCV(model, parameter_space, cv = 5, scoring = "accuracy", verbose = True) # model
my_model = clf.fit(X_train, y_train)
# define bounds of the domain
min1, max1 = X_train[:, 0].min()-1, X_train[:, 0].max()+1
min2, max2 = X_train[:, 1].min()-1, X_train[:, 1].max()+1
# define the x and y scale
x1grid = np.arange(min1, max1, 0.1)
x2grid = np.arange(min2, max2, 0.1)
# create all of the lines and rows of the grid
xx, yy = np.meshgrid(x1grid, x2grid)
# flatten each grid to a vector
r1, r2 = xx.flatten(), yy.flatten()
r1, r2 = r1.reshape((len(r1), 1)), r2.reshape((len(r2), 1))
# horizontal stack vectors to create x1,x2 input for the model
grid = np.hstack((r1,r2))
yhat = clf.predict(grid)
# reshape the predictions back into a grid
zz = yhat.reshape(xx.shape)
# plot the grid of x, y and z values as a surface
plt.contourf(xx, yy, zz, cmap='Paired')
# create scatter plot for samples from each class
for class_value in range(2):
# get row indexes for samples with this class
row_ix = np.where(y == class_value)
# create scatter of these samples
plt.scatter(X_train[row_ix, 0], X_train[row_ix, 1], cmap='Paired')

How can I change the ray_results folder when using TuneGridSearchCV?

I am running quite a large parameter search using TuneGridSearchCV on an xgboost model using my university's HPC cluster. The results are being saved to ~/ray_results however I don't have enough space to save all the files to the home directory as per the HPC policy. How can I move ray_results to a different folder that has more space? I've looked into the the documentation but I am confused about how to do it.
My code is as follows:
import numpy as np
import pandas as pd
from pandas import MultiIndex, Int16Dtype
from sklearnex import patch_sklearn
patch_sklearn()
import xgboost as xgb
from tune_sklearn import TuneGridSearchCV
from datetime import datetime
import sys
if __name__ == "__main__":
df_train = pd.read_excel('my_dataset.xlsx')
train_cols = df_train.columns[df_train.columns != 'Response']
X_train = pd.DataFrame(df_train, columns=train_cols)
y_train = pd.DataFrame(df_train, columns=['Response'])
params = {
"n_estimators" : list(range(100, 1400, 100)),
"max_depth" : list(range(2, 20, 2)),
"min_child_weight" : list(range(2, 20, 2)),
"gamma" : np.arange(0, 1.05, 0.1),
"colsample_bytree" : np.arange(0.5, 1.05, 0.1),
"colsample_bylevel" : np.arange(0.5, 1.05, 0.1),
'reg_lambda': [0.1, 1.0, 5.0, 10.0, 25.0, 50.0]
}
xgb_model = xgb.XGBClassifier(seed=0, use_label_encoder = False, tree_method = 'hist')
print(params)
grid_cv = TuneGridSearchCV(xgb_model, param_grid = params, cv = 5, n_jobs = -1, scoring='roc_auc')
current_time = datetime.now().strftime("%H:%M:%S")
print("Start Time =", current_time)
print('\n')
grid_cv.fit(X_train, y_train.values.ravel())
current_time = datetime.now().strftime("%H:%M:%S")
print('End Time: ', current_time)
print('\n\n')
print('Grid best score (roc_auc): ')
print(grid_cv.best_score_)
print('\n\n')
print('Grid best hyperparameters: ')
print(grid_cv.best_params_)
print('\n\n')
Alternatively, instead of creating a folder for every single parameter combination (which is what it is doing), is there a way to change the format of the output to be more space efficient?
You should be able to set this with TuneGridSearchCV(local_dir="YOUR_PATH").

I am trying to write a python code to get the perspective out of the input image using OpenCV. How to get rid of this error that comes?

This code is about warping the given image and detecting the circular shaped objects (checkers) in the image using hough transfom.
Input files for my code below
This is the input image used
A JSON file having the required dimensions used to calculate the perspective.
{
"canonical_board": {
"tl_tr_br_bl": [
[
622,
85
],
[
1477,
66
],
[
1420,
835
],
[
674,
837
]
],
"bar_width_to_checker_width": 0.716,
"board_width_to_board_height": 1.03,
"pip_length_to_board_height": 0.36
}
}
My code
#import necessary packages
import cv2
import json
import numpy as np
from operator import itemgetter
from glob import glob
#load file
input_file=open('3913.jpg.info.json', 'r')
json_decode = json.load(input_file)
result = []
result.append(json_decode['canonical_board']['tl_tr_br_bl'])
result.append(json_decode['canonical_board']['bar_width_to_checker_width'])
result.append(json_decode['canonical_board']['board_width_to_board_height'])
result.append(json_decode['canonical_board']['pip_length_to_board_height'])
print("tl_tr_br_bl:",result[0])
print("bar_width_to_checker_width:",result[1])
print("board_width_to_board_height",result[2])
print("pip_length_to_board_height",result[3])
normal_img = cv2.imread('3913.jpg')
pts1 = np.float32([[454, 83], [1240, 79], [1424, 808], [275, 842]])
pts2 = np.array([[0.397],[0.986],[0.402]], dtype=np.float32)
M = cv2.getPerspectiveTransform(pts1.astype(np.float32), pts2)
dst = cv2.warpPerspective(normal_img, M, (1300, 800))
#perspective of the original image shown
cv2.imshow(dst)
#converting the image into grayscale
gray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
#locating the circles using hough transform
# detect circles in the image
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1.2, 100)
# ensure at least some circles were found
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
no_of_circles = len(circles)
# loop over the (x, y) coordinates and radius of the circles
for (x, y, r) in circles:
cv2.circle(output, (x, y), r, (0, 255, 0), 4)
cv2.imshow("output", np.hstack([image, output]))
cv2.waitKey(0)
#number of circles
print("number of circles detected-",no_of_circles)
Error I am getting
error Traceback (most recent call last)
<ipython-input-12-efcd2ec83d0c> in <module>
37 pts2 = np.array([[0.397],[0.986],[0.402]], dtype=np.float32)
38
---> 39 M = cv2.getPerspectiveTransform(pts1.astype(np.float32), pts2)
40
41 dst = cv2.warpPerspective(normal_img, M, (1300, 800))
error: OpenCV(4.1.2) /Users/travis/build/skvark/opencv-python/opencv/modules/imgproc/src/imgwarp.cpp:3391: error: (-215:Assertion failed) src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4 in function 'getPerspectiveTransform'
your pts2 array is wrong. it needs to be four points, not three. and the points need to be two-dimensional, not one-dimensional.

How to config Hidden Layers in DNNClassifier

Im new to Tensorflow&ML and following this example:
https://www.tensorflow.org/get_started/tflearn
It works very well until change hidden_units parameter here:
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
model_dir="/tmp/iris_model")
When i try anything, for example hidden_units = [20, 40, 20] or hidden_units = [20] it throws an error.
I tried to find out on my own but unsuccessfully so far and thought someone here can help.
The question is how to chose a number of hidden layers for DNN Classifier and why two my examples above do not work?
Here is a full code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import urllib
import tensorflow as tf
import numpy as np
IRIS_TRAINING = "iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"
IRIS_TEST = "iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
if not os.path.exists(IRIS_TRAINING):
raw = urllib.request.urlopen(IRIS_TRAINING_URL).read()
with open(IRIS_TRAINING,'wb') as f:
f.write(raw)
if not os.path.exists(IRIS_TEST):
raw = urllib.request.urlopen(IRIS_TEST_URL).read()
with open(IRIS_TEST,'wb') as f:
f.write(raw)
# Load datasets.
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TRAINING,
target_dtype=np.int,
features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
filename=IRIS_TEST,
target_dtype=np.int,
features_dtype=np.float32)
# Specify that all features have real-value data
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]
# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
model_dir="/tmp/iris_model")
# Define the training inputs
def get_train_inputs():
x = tf.constant(training_set.data)
y = tf.constant(training_set.target)
return x, y
# Fit model.
classifier.fit(input_fn=get_train_inputs, steps=2000)
# Define the test inputs
def get_test_inputs():
x = tf.constant(test_set.data)
y = tf.constant(test_set.target)
return x, y
# Evaluate accuracy.
accuracy_score = classifier.evaluate(input_fn=get_test_inputs,
steps=1)["accuracy"]
print("\nTest Accuracy: {0:f}\n".format(accuracy_score))
Found it,
if model_dir is not specified than moel works just fine with new hidden_units

Resources