File size: 4,513 Bytes
3aabbc1
 
 
b5523d2
3aabbc1
 
 
 
 
 
 
 
b5523d2
 
dfe37e1
 
 
3aabbc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57716f9
3aabbc1
 
 
b5523d2
 
3aabbc1
57716f9
3aabbc1
 
 
b5523d2
 
98e97b5
3aabbc1
 
 
 
 
 
 
 
 
 
 
b338715
3aabbc1
 
 
 
 
 
 
 
57716f9
dfe37e1
3aabbc1
 
 
 
14a0be1
c1bc977
3aabbc1
 
dfe37e1
3aabbc1
 
 
 
 
 
 
 
 
cedd1d6
3aabbc1
 
 
14a0be1
3aabbc1
 
 
05c6005
 
 
 
 
 
 
 
 
98e97b5
05c6005
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import json
import pickle
import pandas as pd
import panel as pn
import geopandas as gpd
from shapely import Point
import folium
from branca import colormap as cm
import model2vec
from model2vec import StaticModel
from vicinity import Vicinity, Backend, Metric
from sklearn.decomposition import TruncatedSVD


proximax_emploi=0.65
proximax_formation=0.68

gcantons=gpd.read_file("cantons-normandie.geojson").rename(columns={"nom": "canton"})
si=gcantons.sindex
def assigne_canton(row):
    lon, lat=row.longitude, row.latitude
    if pd.isna(lon) or pd.isna(lat):
        return None
    proche=si.nearest(Point(lon, lat))[1][0]
    return gcantons.iloc[proche]['canton']


dfcomp=pd.read_csv("offres_emploi_competences_cantons.csv")\
    .rename(columns={"competencelibelle": "Compétence"})[["canton", "Compétence"]]


dfform=pd.read_csv("formations_en_normandie.csv", index_col=0)\
    .rename(columns={"y_latitude": "latitude", "x_longitude": "longitude"})
certcol="intitule_certification"

items_form= list(set(dfform[certcol]))

dfdist=pd.read_csv("cantons_dist.csv", index_col=0).astype(int)

potion128="minishlab/potion-multilingual-128M"
model = StaticModel.from_pretrained(potion128)

with open("sklearn_svd.pkl", "rb") as f:
    svd=pickle.load(f)

def encode_and_project(query: str):
    if isinstance(query, str):
        query=[query]
    q = model.encode(query)
    q_reduced = svd.transform(q)
    return q_reduced

certcol="intitule_certification"
items_form= list(set(dfform[certcol]))

items_emploi=dfcomp.Compétence.drop_duplicates().to_list()

vice = Vicinity.from_vectors_and_items(
    vectors=model.encode(items_emploi),
    items=items_emploi,
    backend_type=Backend.USEARCH,
    metric=Metric.COSINE
)

vicf = Vicinity.from_vectors_and_items(
    vectors=model.encode(items_form),
    items=items_form,
    backend_type=Backend.USEARCH,
    metric=Metric.COSINE
)

query=pn.widgets.TextInput(name="Décrire la compétence recherchée et presser 'Entrée'")


score=pn.indicators.Number(name="Score d'adéquation", value=2, visible=False,
                           format="{value}/10",
                           title_size='10pt', font_size='30pt',
                           colors=[(3, 'red'), (7, 'orange'), (10, 'green')])


def calcul_score(dft):
    dft['canton']=dft.apply(assigne_canton, axis=1)
    dist=dfdist[:][dft.canton].min(axis=1).sum()    
    return min(10, max(0, 10-dist//2000))


def carte(col):
    req=query.value
    if req=='':
        m = folium.Map(location=[49.124854, -0.0730575], zoom_start=8,  tiles="CartoDB positron")
        score.visible=False
    else:
        test_emb=model.encode(req)
        selcol = [nom for (nom, dist) in vice.query(test_emb, k=200)[0] if dist<proximax_emploi]
        dfselcol=dfcomp[dfcomp["Compétence"].isin(selcol)]
        dfg=dfselcol.groupby("canton")
        dfa=dfg.agg(total= ("Compétence", lambda x: len(x)), 
        compétence= ("Compétence", lambda x: x.value_counts().to_string(header=False).replace("\n", "<br>"))).reset_index()
        gdet=gpd.GeoDataFrame(dfa.merge(gcantons, how='right')).rename(columns={"total": "total offres"})
        m=gdet.explore(column="total offres", tooltip=["canton", "compétence", "total offres"],
                     cmap="viridis", vmax=10, tiles="CartoDB positron")

        res_form =  [nom for (nom, dist) in vicf.query(test_emb, k=50)[0] if dist<proximax_formation]
        dft=pd.DataFrame(res_form, columns=[certcol]).merge(dfform).drop_duplicates(subset=["latitude", "longitude"])
        
        for irow, row in dft.iterrows():
           folium.Marker(
              location=[row['latitude'], row['longitude']],
              tooltip=f"<b>{row['intitule_certification']}</b><p>{row['intitule_formation']}",
              icon=folium.Icon(color="red", prefix="fa", icon="fa-university")
           ).add_to(m)

        score.value=calcul_score(dft)
        score.visible=True
        
            
    return  pn.pane.plot.Folium(m, height=650, width=1024)

lien=pn.bind(carte, col=query)

modal_text="""# Compétences à la carte.

Veuillez rechercher une compétence dans le champ associé.
La carte montrera pour la région Normandie les offres d'emploi de sens associé, ainsi que les offres de formation correspondantes.
"""

app = pn.template.MaterialTemplate(
    title='Compétences à la carte',
    header='par Solo³',
    main=pn.FlexBox(pn.Row(query, score), lien, height=800),
    modal=modal_text,
    
)
app.open_modal()

app.servable();