Lbasara's picture
Update app.py
b338715 verified
import json
import pickle
import pandas as pd
import panel as pn
import geopandas as gpd
from shapely import Point
import folium
from branca import colormap as cm
import model2vec
from model2vec import StaticModel
from vicinity import Vicinity, Backend, Metric
from sklearn.decomposition import TruncatedSVD
proximax_emploi=0.65
proximax_formation=0.68
gcantons=gpd.read_file("cantons-normandie.geojson").rename(columns={"nom": "canton"})
si=gcantons.sindex
def assigne_canton(row):
lon, lat=row.longitude, row.latitude
if pd.isna(lon) or pd.isna(lat):
return None
proche=si.nearest(Point(lon, lat))[1][0]
return gcantons.iloc[proche]['canton']
dfcomp=pd.read_csv("offres_emploi_competences_cantons.csv")\
.rename(columns={"competencelibelle": "Compétence"})[["canton", "Compétence"]]
dfform=pd.read_csv("formations_en_normandie.csv", index_col=0)\
.rename(columns={"y_latitude": "latitude", "x_longitude": "longitude"})
certcol="intitule_certification"
items_form= list(set(dfform[certcol]))
dfdist=pd.read_csv("cantons_dist.csv", index_col=0).astype(int)
potion128="minishlab/potion-multilingual-128M"
model = StaticModel.from_pretrained(potion128)
with open("sklearn_svd.pkl", "rb") as f:
svd=pickle.load(f)
def encode_and_project(query: str):
if isinstance(query, str):
query=[query]
q = model.encode(query)
q_reduced = svd.transform(q)
return q_reduced
certcol="intitule_certification"
items_form= list(set(dfform[certcol]))
items_emploi=dfcomp.Compétence.drop_duplicates().to_list()
vice = Vicinity.from_vectors_and_items(
vectors=model.encode(items_emploi),
items=items_emploi,
backend_type=Backend.USEARCH,
metric=Metric.COSINE
)
vicf = Vicinity.from_vectors_and_items(
vectors=model.encode(items_form),
items=items_form,
backend_type=Backend.USEARCH,
metric=Metric.COSINE
)
query=pn.widgets.TextInput(name="Décrire la compétence recherchée et presser 'Entrée'")
score=pn.indicators.Number(name="Score d'adéquation", value=2, visible=False,
format="{value}/10",
title_size='10pt', font_size='30pt',
colors=[(3, 'red'), (7, 'orange'), (10, 'green')])
def calcul_score(dft):
dft['canton']=dft.apply(assigne_canton, axis=1)
dist=dfdist[:][dft.canton].min(axis=1).sum()
return min(10, max(0, 10-dist//2000))
def carte(col):
req=query.value
if req=='':
m = folium.Map(location=[49.124854, -0.0730575], zoom_start=8, tiles="CartoDB positron")
score.visible=False
else:
test_emb=model.encode(req)
selcol = [nom for (nom, dist) in vice.query(test_emb, k=200)[0] if dist<proximax_emploi]
dfselcol=dfcomp[dfcomp["Compétence"].isin(selcol)]
dfg=dfselcol.groupby("canton")
dfa=dfg.agg(total= ("Compétence", lambda x: len(x)),
compétence= ("Compétence", lambda x: x.value_counts().to_string(header=False).replace("\n", "<br>"))).reset_index()
gdet=gpd.GeoDataFrame(dfa.merge(gcantons, how='right')).rename(columns={"total": "total offres"})
m=gdet.explore(column="total offres", tooltip=["canton", "compétence", "total offres"],
cmap="viridis", vmax=10, tiles="CartoDB positron")
res_form = [nom for (nom, dist) in vicf.query(test_emb, k=50)[0] if dist<proximax_formation]
dft=pd.DataFrame(res_form, columns=[certcol]).merge(dfform).drop_duplicates(subset=["latitude", "longitude"])
for irow, row in dft.iterrows():
folium.Marker(
location=[row['latitude'], row['longitude']],
tooltip=f"<b>{row['intitule_certification']}</b><p>{row['intitule_formation']}",
icon=folium.Icon(color="red", prefix="fa", icon="fa-university")
).add_to(m)
score.value=calcul_score(dft)
score.visible=True
return pn.pane.plot.Folium(m, height=650, width=1024)
lien=pn.bind(carte, col=query)
modal_text="""# Compétences à la carte.
Veuillez rechercher une compétence dans le champ associé.
La carte montrera pour la région Normandie les offres d'emploi de sens associé, ainsi que les offres de formation correspondantes.
"""
app = pn.template.MaterialTemplate(
title='Compétences à la carte',
header='par Solo³',
main=pn.FlexBox(pn.Row(query, score), lien, height=800),
modal=modal_text,
)
app.open_modal()
app.servable();