-
Notifications
You must be signed in to change notification settings - Fork 85
/
dnvme_irq.c
executable file
·1404 lines (1272 loc) · 49.4 KB
/
dnvme_irq.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* NVM Express Compliance Suite
* Copyright (c) 2011, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/msi.h>
#include <linux/list.h>
#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/version.h>
#include "dnvme_irq.h"
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 1, 0)
#define IRQF_DISABLED 0
#endif
/* Static function declarations used for setting interrupt schemes. */
static int validate_irq_inputs(struct metrics_device_list
*pmetrics_device_elem, struct interrupts *irq_new,
struct msix_info *pmsix_tbl_info);
static int set_msix(struct metrics_device_list *pmetrics_device_elem,
u16 num_irqs, struct msix_info *pmsix_tbl_info);
static int set_msi_single(struct metrics_device_list *pmetrics_device_elem);
static int set_msi_multi(struct metrics_device_list *pmetrics_device_elem,
u16 num_irqs);
static int add_irq_node(struct metrics_device_list *pmetrics_device_elem,
u32 int_vec, u16 irq_no);
static void bh_callback(struct work_struct *work);
static void dealloc_all_icqs(struct irq_track *pirq_trk_list);
static int disable_active_irq(struct metrics_device_list
*pmetrics_device_elem, enum nvme_irq_type irq_active);
static void inc_isr_count(struct irq_processing *pirq_process,
u16 irq_no);
static struct irq_track *find_irq_node(
struct metrics_device_list *pmetrics_device_elem, u16 irq_no);
static struct irq_cq_track *find_icq_node(struct irq_track *pirq_node,
u16 cq_id);
static struct work_container *get_work_item(struct irq_processing
*pirq_process, u32 int_vec);
static void nvme_disable_pin(struct pci_dev *dev);
static int update_msixptr(struct metrics_device_list
*pmetrics_device_elem, u16 offset, struct msix_info *pmsix_tbl_info);
static void set_msix_mask_bit(u8 __iomem *irq_msixptr, u16 irq_no, u32 flag);
static int work_queue_init(struct irq_processing *pirq_process);
static int add_wk_item(struct irq_processing *pirq_process,
u32 int_vec, u16 irq_no);
/**
* Calls a pci_enable_msi function. Support for pci_enable_msi_block was
* dropped in 3.16 in favor of pci_enable_msi_range which was implemented in
* 3.13.x kernels (but not 3.13.0). Calls pci_enable_msi_block for 3.13.x
* kernels and earlier or otherwise calls pci_enable_msi_range.
* @param dev the pci device structure
* @param nvec the number of interrupt vectors to allocate
* @return 0 on success, <0 on error, >0 if fewer than nvec interrupts could
* be allocated.
*/
static int dnvme_pci_enable_msi(struct pci_dev * dev, unsigned int nvec);
/*
* nvme_set_irq will set the new interrupt scheme for this device regardless
* of the current irq scheme that is active for this device. It also validates
* if the inputs given for setting up new scheme are within bounds.
* NOTE: The controller should be disabled before setting up new scheme.
*/
int nvme_set_irq(struct metrics_device_list *pmetrics_device_elem,
struct interrupts *irq_new)
{
int err = SUCCESS;
struct msix_info msix_tbl_info; /* Info for MSI-X tables */
struct nvme_device *pnvme_dev = pmetrics_device_elem->metrics_device;
struct interrupts *user_data = NULL;
memset(&msix_tbl_info, 0, sizeof(struct msix_info));
/* Allocating memory for user struct in kernel space */
user_data = kmalloc(sizeof(struct interrupts), GFP_KERNEL);
if (user_data == NULL) {
LOG_ERR("Unable to alloc kernel memory to copy user data");
err = -ENOMEM;
goto fail_out;
}
if (copy_from_user(user_data, irq_new, sizeof(struct interrupts))) {
LOG_ERR("Unable to copy from user space");
err = -EFAULT;
goto fail_out;
}
LOG_DBG("IRQ Scheme = %d", user_data->irq_type);
/* First validate if the inputs given are correct */
err = validate_irq_inputs(pmetrics_device_elem, user_data,
&msix_tbl_info);
if (err < 0) {
LOG_ERR("Invalid inputs set or device is not disabled");
return err;
}
/* lock onto IRQ linked list mutex as we would access the IRQ list */
mutex_lock(&pmetrics_device_elem->irq_process.irq_track_mtx);
/* disable the current IRQ scheme */
err = disable_active_irq(pmetrics_device_elem, pnvme_dev->
public_dev.irq_active.irq_type);
if (err < 0) {
LOG_ERR("Reset of IRQ to INT_NONE failed...");
goto mutex_unlck;
}
/* initialize work queue */
err = work_queue_init(&pmetrics_device_elem->irq_process);
if (err < 0) {
LOG_ERR("Failed to initialize resources for work queue/items");
goto mutex_unlck;
}
/* Switch based on new irq type desired */
switch (user_data->irq_type) {
case INT_MSI_SINGLE: /* MSI Single interrupt settings */
err = set_msi_single(pmetrics_device_elem);
break;
case INT_MSI_MULTI: /* MSI Multi interrupt settings */
err = set_msi_multi(pmetrics_device_elem, user_data->num_irqs);
break;
case INT_MSIX: /* MSI-X interrupt settings */
err = set_msix(pmetrics_device_elem, user_data->num_irqs,
&msix_tbl_info);
break;
case INT_NONE: /* Set IRQ type to NONE */
/* If here then already the IRQ scheme is none */
break;
default:
LOG_ERR("Invalid Interrupt Type specified.");
err = -EBADRQC;
break;
}
/* Return value can be +ve, 0(SUCCESS) or -ve */
if (err == SUCCESS) {
/* Set to the new irq scheme */
pnvme_dev->public_dev.irq_active.irq_type = user_data->irq_type;
pnvme_dev->public_dev.irq_active.num_irqs = user_data->num_irqs;
/* Following will only be read by ISR */
pmetrics_device_elem->irq_process.irq_type = user_data->irq_type;
}
/* Fall through is intended */
mutex_unlck:
mutex_unlock(&pmetrics_device_elem->irq_process.irq_track_mtx);
fail_out:
if (user_data != NULL) {
kfree(user_data);
}
return err;
}
/*
* Used for Initializing the IRQ lists before any scheme is run
* Lock on to the mutex and remove all the irq and cq track nodes.
* Also removes all the enqueued wk items
* set the current active scheme to INT_NONE.
* NOTE: This will grab the irq mutex and releases.
*/
int init_irq_lists(struct metrics_device_list
*pmetrics_device_elem, enum nvme_irq_type irq_active)
{
int err;
/* locking on IRQ MUTEX here for irq track ll access */
mutex_lock(&pmetrics_device_elem->irq_process.irq_track_mtx);
/* Initialize active irq to INT_NONE */
err = disable_active_irq(pmetrics_device_elem, pmetrics_device_elem->
metrics_device->public_dev.irq_active.irq_type);
/* Unlock IRQ MUTEX as we are done with updated irq track list */
mutex_unlock(&pmetrics_device_elem->irq_process.irq_track_mtx);
return err;
}
/*
* Used for releasing the IRQ lists after any scheme is run
* Also removes all the enqueued wk items
* set the current active scheme to INT_NONE.
*/
void release_irq(struct metrics_device_list *pmetrics_device_elem)
{
/* Disable the IRQ */
irq_disable(pmetrics_device_elem);
if (pmetrics_device_elem->irq_process.wq) {
LOG_DBG("Wait for the WQ to get flushed");
/* Flush the WQ and wait till all BH's are executed */
flush_workqueue(pmetrics_device_elem->irq_process.wq);
LOG_DBG("Destroy the recently flushed WQ");
/* Destroy the WQ */
destroy_workqueue(pmetrics_device_elem->irq_process.wq);
pmetrics_device_elem->irq_process.wq = NULL;
}
/* Note Mutex lock and unlock not required
* even though we are editing the IRQ track list
* since no more ISR's and BH's are pending
*/
/* clean up and free all IRQ linked list nodes */
deallocate_irq_trk(pmetrics_device_elem);
/*Dealloc the work list if it exists */
dealloc_wk_list(&pmetrics_device_elem->irq_process);
/* Now we can Set IRQ type to INT_NONE */
pmetrics_device_elem->metrics_device->public_dev.irq_active.
irq_type = INT_NONE;
pmetrics_device_elem->metrics_device->public_dev.irq_active.
num_irqs = 0;
/* Will only be read by ISR */
pmetrics_device_elem->irq_process.irq_type = INT_NONE;
}
/*
* The function first deallocates the IRQ linked list, then disables IRQ
* scheme sent in irq_active, finally resets active irq scheme to INT_NONE.
* Also re-initializes the irq track linked list.
* NOTE: Always call this function with IRQ MUTEX locked, otherwise it fails.
*/
static int disable_active_irq(struct metrics_device_list
*pmetrics_device_elem, enum nvme_irq_type irq_active)
{
#ifdef DEBUG
/* If mutex is not locked then exit here */
if (!mutex_is_locked(&pmetrics_device_elem->irq_process.irq_track_mtx)) {
LOG_ERR("Mutex should have been locked before this...");
/* Mutex is not locked so exiting */
return -EINVAL;
}
#endif
/* Disable the IRQ */
irq_disable(pmetrics_device_elem);
/* clean up and free all IRQ linked list nodes */
deallocate_irq_trk(pmetrics_device_elem);
/* Dealloc the work list if it exists */
dealloc_wk_list(&pmetrics_device_elem->irq_process);
/* Now we can Set IRQ type to INT_NONE */
pmetrics_device_elem->metrics_device->public_dev.irq_active.
irq_type = INT_NONE;
pmetrics_device_elem->metrics_device->public_dev.irq_active.
num_irqs = 0;
/* Will only be read by ISR */
pmetrics_device_elem->irq_process.irq_type = INT_NONE;
return SUCCESS;
}
/*
* nvme_disable_pin will read the CMD register in PCI space and sets
* 10th bit in the CMD register to 1. This will disable the controller
* from generating PIN# based interrupts.
* NOTE: MSI operation is not affected.
*/
static void nvme_disable_pin(struct pci_dev *dev)
{
u16 val;
/* disable pin based INT by writing 0 in bit position 10 of CMD_OFFSET */
pci_read_config_word(dev, CMD_OFFSET, &val);
val |= PIN_INT_BIT_MASK;
pci_write_config_word(dev, CMD_OFFSET, val);
}
/*
* Check if the controller supports the interrupt type requested. If it
* supports returns the offset, otherwise it will return invalid for the
* caller to indicate that the controller does not support the capability
* type.
*/
int check_cntlr_cap(struct pci_dev *pdev, enum nvme_irq_type cap_type,
u16 *offset)
{
u16 val = 0;
u16 pci_offset = 0;
int ret_val = -EINVAL;
if (pci_read_config_word(pdev, PCI_DEVICE_STATUS, &val) < 0) {
LOG_ERR("pci_read_config failed...");
return -EINVAL;
}
LOG_DBG("PCI_DEVICE_STATUS = 0x%X", val);
if (!(val & CL_MASK)) {
LOG_ERR("Controller does not support Capability list...");
return -EINVAL;
} else {
if (pci_read_config_word(pdev, CAP_REG, &pci_offset) < 0) {
LOG_ERR("pci_read_config failed...");
return -EINVAL;
}
}
/* Interrupt Type MSI-X*/
if (cap_type == INT_MSIX) {
/* Loop through Capability list */
while (pci_offset) {
if (pci_read_config_word(pdev, pci_offset, &val) < 0) {
LOG_ERR("pci_read_config failed...");
return -EINVAL;
}
/* exit when we find MSIX_capbility offset */
if ((val & ~NEXT_MASK) == MSIXCAP_ID) {
/* write msix cap offset */
*offset = pci_offset;
ret_val = SUCCESS;
/* break from while loop */
break;
}
/* Next Capability offset. */
pci_offset = (val & NEXT_MASK) >> 8;
} /* end of while loop */
} else if (cap_type == INT_MSI_SINGLE || cap_type == INT_MSI_MULTI) {
/* Loop through Capability list */
while (pci_offset) {
if (pci_read_config_word(pdev, pci_offset, &val) < 0) {
LOG_ERR("pci_read_config failed...");
return -EINVAL;
}
/* exit when we find MSIX_capbility offset */
if ((val & ~NEXT_MASK) == MSICAP_ID) {
/* write the msi offset */
*offset = pci_offset;
ret_val = SUCCESS;
/* break from while loop */
break;
}
/* Next Capability offset. */
pci_offset = (val & NEXT_MASK) >> 8;
} /* end of while loop */
} else {
LOG_ERR("Invalid capability type specified...");
ret_val = -EINVAL;
}
return ret_val;
}
/*
* Validates the IRQ inputs for MSI-X, MSI-Single and MSI-Mutli.
* If the CC.EN bit is set or the number of irqs are invalid then
* return failure otherwise success.
*/
static int validate_irq_inputs(struct metrics_device_list
*pmetrics_device_elem, struct interrupts *irq_new,
struct msix_info *pmsix_tbl_info)
{
int ret_val = SUCCESS;
struct nvme_device *pnvme_dev = pmetrics_device_elem->metrics_device;
struct pci_dev *pdev = pmetrics_device_elem->metrics_device->
private_dev.pdev;
u16 msi_offset;
u16 mc_val;
/* Check if the EN bit is set and return failure if set */
if (readl(&pnvme_dev->private_dev.ctrlr_regs->cc) & NVME_CC_ENABLE) {
LOG_ERR("IRQ Scheme cannot change when CC.EN bit is set!!");
LOG_ERR("Call Disable or Disable completely first...");
return -EINVAL;
}
/* Switch based on new irq type desired */
switch (irq_new->irq_type) {
case INT_MSI_SINGLE: /* MSI Single interrupt settings */
if (irq_new->num_irqs != MAX_IRQ_VEC_MSI_SIN) {
LOG_ERR("IRQ vectors cannot be greater/equal %d in MSI Single IRQ",
MAX_IRQ_VEC_MSI_SIN);
return -EINVAL;
}
/* Check if the card Supports MSI capability */
if (check_cntlr_cap(pdev, INT_MSI_SINGLE, &msi_offset) < 0) {
LOG_ERR("Controller does not support for MSI capability!!");
return -EINVAL;
}
/* Update interrupt vector Mask Set and Mask Clear offsets */
pmetrics_device_elem->irq_process.mask_ptr = pmetrics_device_elem->
metrics_device->private_dev.bar0 + INTMS_OFFSET;
break;
case INT_MSI_MULTI: /* MSI Multi interrupt settings */
if (irq_new->num_irqs > MAX_IRQ_VEC_MSI_MUL ||
irq_new->num_irqs == 0) {
LOG_ERR("IRQ vectors cannot be greater/equal %d in MSI Multi IRQ",
MAX_IRQ_VEC_MSI_MUL);
return -EINVAL;
}
/* Check if the card Supports MSI capability */
if (check_cntlr_cap(pdev, INT_MSI_MULTI, &msi_offset) < 0) {
LOG_ERR("Controller does not support for MSI capability!!");
return -EINVAL;
}
/* compute MSI MC offset if MSI is supported */
msi_offset += 2;
/* Read MSI-MC value */
pci_read_config_word(pdev, msi_offset, &mc_val);
if (irq_new->num_irqs > (1 << ((mc_val & MSI_MME) >> 4))) { // power 2
LOG_ERR("IRQs = %d exceed MSI MME = %d", irq_new->num_irqs,
(1 << ((mc_val & MSI_MME) >> 4)));
/* does not support the requested irq's*/
return -EINVAL;
}
/* Update interrupt vector Mask Set and Mask Clear offsets */
pmetrics_device_elem->irq_process.mask_ptr = pmetrics_device_elem->
metrics_device->private_dev.bar0 + INTMS_OFFSET;
break;
case INT_MSIX: /* MSI-X interrupt settings */
/* First check if num irqs req are greater than MAX MSIX SUPPORTED */
if (irq_new->num_irqs > MAX_IRQ_VEC_MSI_X ||
irq_new->num_irqs == 0) {
LOG_ERR("IRQ vectors cannot be greater/equal %d in MSI-X IRQ",
MAX_IRQ_VEC_MSI_X);
return -EINVAL;
}
/* Check if the card Supports MSIX capability */
if (check_cntlr_cap(pdev, INT_MSIX, &msi_offset) < 0) {
LOG_ERR("Controller does not support for MSI-X capability!!");
return -EINVAL;
}
/* if msix exists then update the msix pointer for this device */
if (update_msixptr(pmetrics_device_elem, msi_offset, pmsix_tbl_info)
< 0) {
return -EINVAL;
}
/* compute MSI-X MXC offset if MSI-X is supported */
msi_offset += 2;
/* Read MSIX-MXC value */
pci_read_config_word(pdev, msi_offset, &mc_val);
pmsix_tbl_info->ts = (mc_val & MSIX_TS);
/* check if Table size of MSIXCAP supports requested irqs.
* as TS is 0 based and num_irq is 1 based, so we add 1 */
if (irq_new->num_irqs > (pmsix_tbl_info->ts + 1)) {
LOG_ERR("IRQs = %d exceed MSI-X table size = %d", irq_new->
num_irqs, pmsix_tbl_info->ts);
/* does not support the requested irq's*/
return -EINVAL;
} /* if table size */
break;
case INT_NONE: /* INT_NONE validation always returns success */
/* no validation for INT_NONE schemes return success. */
break;
default:
/* invalidate other type of IRQ schemes */
LOG_ERR("No validation for default case..");
ret_val = -EINVAL;
break;
}
return ret_val;
}
/*
* Sets up the active IRQ scheme to MSI-X. It gets the number of irqs
* requested and loops from 0 to n -1 irqs, enables the active irq
* scheme to MSI-X. Calls request_irq for each irq no and gets the OS
* allocated interrupt vector. This function add each of this irq node
* in the irq track linked list with int_vec and irq no. At any point
* if the adding of node fails it cleans up and exits with invalid return
* code.
* Return 0 on sucess and -ve or +ve values on error
*/
static int set_msix(struct metrics_device_list *pmetrics_device_elem,
u16 num_irqs, struct msix_info *pmsix_tbl_info)
{
int ret_val, i, j, tmp_irq;
u32 regVal;
static struct msix_entry msix_entries[MAX_IRQ_VEC_MSI_X];
struct pci_dev *pdev = pmetrics_device_elem->metrics_device->
private_dev.pdev;
struct irq_track *pirq_node;
memset(msix_entries, 0, sizeof(struct msix_entry) * MAX_IRQ_VEC_MSI_X);
/* Assign irq entries from 0 to n-1 */
for (i = 0; i < num_irqs; i++) {
msix_entries[i].entry = i;
}
/* Allocate msix interrupts to this device */
ret_val = pci_enable_msix(pdev, msix_entries, num_irqs);
if (ret_val) {
LOG_ERR("Can't enable MSI-X");
return ret_val;
}
/* Request irq on each interrupt vector */
for (i = 0; i < num_irqs; i++) {
/* If request fails on any interrupt vector then fail here */
ret_val = request_irq(msix_entries[i].vector, tophalf_isr, IRQF_DISABLED
| IRQF_SHARED, "msi-x", &pmetrics_device_elem->irq_process);
if (ret_val < 0) {
LOG_ERR("MSI-X-Err: request irq failed for ivec= %u",
msix_entries[i].vector);
/* As we are allocating memory for one node at a time
* failing here needs freeing up memory previously allocated */
goto free_msix;
}
/* Add node after determining interrupt vector req is successful */
LOG_DBG("Add Node for Vector = %d", msix_entries[i].vector);
ret_val = add_irq_node(pmetrics_device_elem, msix_entries[i].vector,
msix_entries[i].entry);
if (ret_val < 0) {
LOG_ERR("MSI-X-Err: can't add irq node");
goto free_msix;
} /* end of if add_irq_node */
/* Add node after determining interrupt vector req is successful */
LOG_DBG("Add Wk item node for Vector = %d", msix_entries[i].vector);
ret_val = add_wk_item(&pmetrics_device_elem->irq_process,
msix_entries[i].vector, msix_entries[i].entry);
if (ret_val < 0) {
LOG_ERR("MSI-X-Err: can't add work item node");
goto free_msix;
} /* end of if add_wk_item */
} /* end of for num_irqs */
/* fetch the Irq node 0 */
pirq_node = find_irq_node(pmetrics_device_elem, 0);
if (pirq_node == NULL) {
LOG_ERR("Can't find CQ 0 node inside irq_track list");
ret_val = -EINVAL;
goto free_msix;
}
/* Add the default ACQ node into the default irq node */
ret_val = add_icq_node(pirq_node, 0);
if (ret_val < 0) {
LOG_ERR("Can't add CQ 0 node inside irq_track list");
goto free_msix;
}
/* Check for consistency of IRQ setup */
tmp_irq = num_irqs;
for (i = 0; i < (((num_irqs - 1) / 32) + 1); i++) {
regVal = readl((u32*)pmsix_tbl_info->pba_tbl + i);
for (j = 0; (j < 32) && (j < tmp_irq); j++) {
if (regVal & (1 << j)) {
LOG_ERR("PBA bit is set at IRQ init, nothing should be set");
ret_val = -EINVAL;
goto free_msix;
}
}
tmp_irq -= 32;
}
/* Unmask each irq vector's mask bit in msix table */
for (i = 0; i < num_irqs; i++)
set_msix_mask_bit(pmsix_tbl_info->msix_tbl, i, 0);
return ret_val;
free_msix:
disable_active_irq(pmetrics_device_elem, pmetrics_device_elem->
metrics_device->public_dev.irq_active.irq_type);
return ret_val;
}
/*
* Set up the active irq scheme to MSI Single interrupt scheme. The interrupt
* vector is assigned by the pci_enable_msi call and pdev->irq is updated with
* new interrupt vector for this device and same is added in the irq linked
* list for 0 irq number. If irq request and memory for the irq node is
* successful, only then the irq track list is updated otherwise fails and
* returns invalid return code. Finally clear all the interrupts using INTMC
* register by writing all 1's to this register.
* Return 0 on sucess and -ve or +ve values on error
*/
static int set_msi_single(struct metrics_device_list *pmetrics_device_elem)
{
struct pci_dev *pdev = pmetrics_device_elem->metrics_device->
private_dev.pdev;
int ret_val;
struct irq_track *pirq_node;
/* Clear all the interrupts for MSI Single by writing to INTMC */
writel(UINT_MAX, (pmetrics_device_elem->irq_process.mask_ptr + 0x04));
/*
* Allocate one interrupt to this device. A successful call will switch the
* device to msi single mode and pdev->irq is changed to a new number which
* represents msi interrupt vector consequently.
*/
ret_val = pci_enable_msi(pdev);
if (ret_val) {
LOG_ERR("Can't enable msi ");
return ret_val; /* exit from here */
}
/* request irq with top half handler and int vec stored in pdev->irq. */
ret_val = request_irq(pdev->irq, tophalf_isr, IRQF_DISABLED
| IRQF_SHARED, "msi-single", &pmetrics_device_elem->irq_process);
if (ret_val < 0) {
LOG_ERR("Request irq failed for ivec= %i", pdev->irq);
return ret_val; /* exit from here */
}
LOG_DBG("MSI-Single Interrupt Vector = %d", pdev->irq);
/* Add node after determining interrupt vector is successful */
ret_val = add_irq_node(pmetrics_device_elem, pdev->irq, 0);
if (ret_val < 0) {
LOG_ERR("Can't add irq node");
goto free_msis;
}
ret_val = add_wk_item(&pmetrics_device_elem->irq_process, pdev->irq, 0);
if (ret_val < 0) {
LOG_ERR("Can't add work item node");
goto free_msis;
}
/* fetch the Irq node 0 */
pirq_node = find_irq_node(pmetrics_device_elem, 0);
if (pirq_node == NULL) {
LOG_ERR("Can't find CQ 0 node inside irq_track list");
ret_val = -EINVAL;
goto free_msis;
}
/* Add the default ACQ node into the default irq node */
ret_val = add_icq_node(pirq_node, 0);
if (ret_val < 0) {
LOG_ERR("Can't add CQ 0 node inside irq_track list");
goto free_msis;
}
return ret_val;
free_msis:
disable_active_irq(pmetrics_device_elem, pmetrics_device_elem->
metrics_device->public_dev.irq_active.irq_type);
return ret_val;
}
static int dnvme_pci_enable_msi(struct pci_dev * dev, unsigned int nvec)
{
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,14,0)
return pci_enable_msi_block(dev, nvec);
#else
int ret;
ret = pci_enable_msi_range(dev, nvec, nvec);
if (ret < nvec)
return ret;
else
return 0;
#endif
}
/*
* Sets up the active IRQ scheme to MSI-Multi. Number of irqs to be allocated
* is passed as a parameter and After successfully enabling the msi block, this
* loops from pdev->irq to (pdev->irq + num_irqs -1) and calls request_irq for
* each irq no. This function appends each of this irq node in the irq track
* linked list with int_vec and irq no. At any point if the adding of node
* fails it cleans up and exits with invalid return code. Finally clear all
* the interrupts using INTMC register by writing all 1's to this register.
* Return 0 on sucess and -ve or +ve values on error
*/
static int set_msi_multi(struct metrics_device_list *pmetrics_device_elem,
u16 num_irqs)
{
int ret_val, i;
struct pci_dev *pdev = pmetrics_device_elem->metrics_device->
private_dev.pdev;
struct irq_track *pirq_node;
/* Clear all the interrupts for MSI Single by writing to INTMC */
writel(UINT_MAX, (pmetrics_device_elem->irq_process.mask_ptr + 0x04));
/* Enable msi-block interrupts for this device. The pdev->irq will
* be the lowest of the new interrupts assigned to this device. */
ret_val = dnvme_pci_enable_msi(pdev, num_irqs);
if (ret_val) {
LOG_ERR("Can't enable MSI-Multi with num_irq=%d", num_irqs);
return ret_val;
}
/* Request irq on each interrupt vector */
for (i = 0; i < num_irqs; i++) {
/* If request fails on any interrupt vector then fail here */
ret_val = request_irq((pdev->irq + i), tophalf_isr, IRQF_DISABLED
| IRQF_SHARED, "msi-multi", &pmetrics_device_elem->irq_process);
if (ret_val < 0) {
LOG_ERR("Request IRQ failed ivec = %d", pdev->irq + i);
/* As we are allocating memory for one node at a time
* failing here needs freeing up memory previously allocated */
goto free_msim;
}
/* Add node after determining interrupt vector req is successful */
LOG_DBG("Add Node for Vector = %d", pdev->irq + i);
ret_val = add_irq_node(pmetrics_device_elem, (pdev->irq + i), i);
if (ret_val < 0) {
LOG_ERR("Can't add irq node");
goto free_msim;
} /* end of if add_irq_node */
/* Add node after determining interrupt vector req is successful */
LOG_DBG("Add Wk item node for Vector = %d", pdev->irq + i);
ret_val = add_wk_item(&pmetrics_device_elem->irq_process,
(pdev->irq + i), i);
if (ret_val < 0) {
LOG_ERR("Can't add work item node");
goto free_msim;
} /* end of if add_wk_item */
} /* end of for num_irqs */
/* fetch the Irq node 0 */
pirq_node = find_irq_node(pmetrics_device_elem, 0);
if (pirq_node == NULL) {
LOG_ERR("Can't find CQ 0 node inside irq_track list");
ret_val = -EINVAL;
goto free_msim;
}
/* Add the default ACQ node into the default irq node */
ret_val = add_icq_node(pirq_node, 0);
if (ret_val < 0) {
LOG_ERR("Can't add CQ 0 node inside irq_track list");
goto free_msim;
}
return ret_val;
free_msim:
disable_active_irq(pmetrics_device_elem, pmetrics_device_elem->
metrics_device->public_dev.irq_active.irq_type);
return ret_val;
}
/*
* Update MSIX pointer in the irq process structure.
*/
static int update_msixptr(struct metrics_device_list *pmetrics_device_elem,
u16 offset, struct msix_info *pmsix_tbl_info)
{
u8 __iomem *msix_ptr = NULL;
u8 __iomem *pba_ptr = NULL;
u32 msix_mtab; /* MSIXCAP.MTAB register */
u32 msix_to; /* MSIXCAP.MTAB.TO field */
u32 msix_tbir; /* MSIXCAP.MTAB.TBIR field */
u32 msix_mpba; /* MSIXCAP.MPBA register */
u32 msix_pbir; /* MSIXCAP.MPBA.PBIR field */
u32 msix_pbao; /* MSIXCAP.MPBA.PBAO field */
struct nvme_device *metrics_device = pmetrics_device_elem->metrics_device;
struct pci_dev *pdev = metrics_device->private_dev.pdev;
/* Compute & read offset for MSIXCAP.MTAB register */
offset += 0x4;
pci_read_config_dword(pdev, offset, &msix_mtab);
msix_tbir = (msix_mtab & MSIX_TBIR);
msix_to = (msix_mtab & ~MSIX_TBIR);
/* Compute & read offset for MSIXCAP.MPBA register */
offset += 0x4;
pci_read_config_dword(pdev, offset, &msix_mpba);
msix_pbir = (msix_mpba & MSIX_PBIR);
msix_pbao = (msix_mpba & ~MSIX_PBIR);
switch (msix_tbir) {
case 0x00: /* BAR0 (64-bit) */
msix_ptr = (metrics_device->private_dev.bar0 + msix_to);
break;
case 0x04: /* BAR2 (64-bit) */
if (metrics_device->private_dev.bar2 == NULL) {
LOG_ERR("BAR2 not implemented by DUT");
return -EINVAL;
}
msix_ptr = (metrics_device->private_dev.bar2 + msix_to);
break;
case 0x05:
LOG_ERR("BAR5 not supported, implies 32-bit, TBIR requiring 64-bit");
return -EINVAL;
default:
LOG_ERR("BAR? not supported, check value in MSIXCAP.MTAB.TBIR");
return -EINVAL;
}
switch (msix_pbir) {
case 0x00: /* BAR0 (64-bit) */
pba_ptr = (metrics_device->private_dev.bar0 + msix_pbao);
break;
case 0x04: /* BAR2 (64-bit) */
if (metrics_device->private_dev.bar2 == NULL) {
LOG_ERR("BAR2 not implemented by DUT");
return -EINVAL;
}
pba_ptr = (metrics_device->private_dev.bar2 + msix_pbao);
break;
case 0x05:
LOG_ERR("BAR5 not supported, implies 32-bit, MPBA requiring 64-bit");
return -EINVAL;
default:
LOG_ERR("BAR? not supported, check value in MSIXCAP.MPBA.PBIR");
return -EINVAL;
}
/* Update the msix pointer in the device metrics */
pmetrics_device_elem->irq_process.mask_ptr = msix_ptr;
pmsix_tbl_info->msix_tbl = msix_ptr;
pmsix_tbl_info->pba_tbl = pba_ptr;
return SUCCESS;
}
/*
* set_msix_mask_bit - This function will mask or unmask the vector
* control bit inside the MSIX table for the corresponding irq_no.
* When the mask is set, the controller will not generate the interrupts
* for this interrupt vector. To unmask, flag value is passed with value
* 0x0 and the corresponding interrupt vector is unmasked and controller
* will start generating the interrupts.
*/
static void set_msix_mask_bit(u8 __iomem *irq_msixptr, u16 irq_no, u32 flag)
{
u8 __iomem *msixptr;
/* Get the MSI-X pointer offset */
msixptr = irq_msixptr;
/* Compute vector control offset for this irq no. */
msixptr += MSIX_VEC_CTRL + irq_no * MSIX_ENTRY_SIZE;
/* Mask or unmask the the MSIX vector by writing flag bit */
writel(flag, msixptr);
/* Flush the write */
readl(msixptr);
}
/*
* mask_interrupts - Determine the type of masking required based
* on the interrupt scheme active. Mask interrupts for MSI-Single,
* MSI- Multi and MSIX
*/
void mask_interrupts(u16 irq_no, struct irq_processing *pirq_process)
{
/* handle all masking here */
switch (pirq_process->irq_type) {
case INT_MSI_SINGLE: /* Same as MSI MULTI */
case INT_MSI_MULTI: /* Masking MSI interrupt in INTMS register */
/* Mask INTMS register for the int generated */
writel((0x1 << irq_no), pirq_process->mask_ptr);
/* Flush the wrte */
readl(pirq_process->mask_ptr);
break;
case INT_MSIX: /* Masking for MSI-X using vector control */
set_msix_mask_bit(pirq_process->mask_ptr, irq_no, 0x1);
break;
case INT_NONE:
break;
default:
LOG_ERR("Unknown interrupt type to Mask...");
break;
}
}
/*
* unmask_interrupts - Determine the type of interrupt scheme and
* unmask interrupts for MSI-Single, MSI- Multi and MSIX.
*/
void unmask_interrupts(u16 irq_no, struct irq_processing
*pirq_process)
{
/* handle all unmasking here */
switch (pirq_process->irq_type) {
case INT_MSI_SINGLE:
case INT_MSI_MULTI:
/* unMask INTMC register for the int generated */
writel((0x1 << irq_no), (pirq_process->mask_ptr + 0x04));
/* Flush the wrte */
readl(pirq_process->mask_ptr + 0x04);
break;
case INT_MSIX: /* ummask for MSI-X */
set_msix_mask_bit(pirq_process->mask_ptr, irq_no, 0x0);
break;
case INT_NONE:
break;
default:
LOG_ERR("Unknown interrupt type to Mask...");
break;
}
}
/*
* Top half isr responds to the interrupt by masking the corresponding irq
* vector if required and scheduling the bottom half processing.
*/
irqreturn_t tophalf_isr(int int_vec, void *dev_id)
{
/* Point to the right hardware item using this dev_id */
struct irq_processing *pirq_process =
(struct irq_processing *) dev_id;
struct work_container *pwk;
irqreturn_t ret_val = IRQ_HANDLED;
/* Look for the required work_struct */
pwk = get_work_item(pirq_process, int_vec);
if (pwk == NULL) {
LOG_ERR("spurious irq with int_vec = %d", int_vec);
return IRQ_NONE;
}
/* To resolve contention between ISR's getting fired on different cores */
spin_lock(&pirq_process->isr_spin_lock);
LOG_DBG("TH:IRQNO = %d is serviced", pwk->irq_no);
/* Mask the interrupts which was fired till BH */
mask_interrupts(pwk->irq_no, pirq_process);
if (queue_work(pirq_process->wq, &pwk->sched_wq)
== 0) {
LOG_ERR("Work item already in Queue");
}
/* unlock as we are done with critical section */
spin_unlock(&pirq_process->isr_spin_lock);
return ret_val;
}
/*
* inc_isr_count:
* Search the CQ node for which the IRQ is fired and set flag. Increment
* count by one.
*/
static void inc_isr_count(struct irq_processing *pirq_process,
u16 irq_no)
{
struct irq_track *pirq_node; /* Pointer to irq node */
/* Loop for each irq node */
list_for_each_entry(pirq_node, &pirq_process->irq_track_list,
irq_list_hd) {
if (irq_no == pirq_node->irq_no) {
pirq_node->isr_fired = 1;
pirq_node->isr_count++;
LOG_DBG("BH:isr count = %d for irq no = %d",
pirq_node->isr_count, pirq_node->irq_no);
} /* end of if ivec */
} /* end of list for irq */
}
/*
* This is the work item that gets scheduled when the ISR gets fired.
* The bottom halfs are queued up and we should not worry about multiple
* interrupts getting fired up as we store them in the array as a lookup
* table.
*/
static void bh_callback(struct work_struct *work_element)
{
/* Getting the work item from work_struct */
struct work_container *pwork =
container_of(work_element, struct work_container, sched_wq);
/* lock irq mutex as we will access the irq nodes */
mutex_lock(&pwork->pirq_process->irq_track_mtx);
/* Set the values in the node */
inc_isr_count(pwork->pirq_process, pwork->irq_no);
/* unlock as we are done with updating the irq nodes */
mutex_unlock(&pwork->pirq_process->irq_track_mtx);
LOG_DBG("BH:IRQNO = %d is serviced in Bottom Half", pwork->irq_no);
}
/*
* work_queue_init:
* Is used for setting up memory for work queue
*/
int work_queue_init(struct irq_processing *pirq_process)
{
if (pirq_process->wq == NULL) {
/* If work Queue does not exist from previous run of the IOCTL
* then create a new work queue.
*/
pirq_process->wq = create_workqueue("work_queue");
if (pirq_process->wq == NULL) {
LOG_ERR("Failed work queue creation");
return -ENOMEM;
}
}
return SUCCESS;
}
/*
* Add a irq track node in irq_track linked list. Allocates memory for one
* irq_track node, sets up the values for this node. Initialize the CQ_track
* node for this irq_node then add the irq_node to the itq_track linked list.
*/